In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import cv2
import os
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.models import Model,Sequential, Input, load_model
from keras.layers import *
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam,RMSprop
from tensorflow.keras import metrics

from tensorflow.keras.applications.inception_v3 import InceptionV3
from itertools import combinations as comb
import ntpath
import glob
import math
from sklearn import metrics
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.preprocessing import OneHotEncoder 

In [None]:
disease_types=['COVID', 'non-COVID']
data_dir = '../input/sarscov2-ctscan-dataset/'
train_dir = os.path.join(data_dir)

In [None]:
train_data = []
for defects_id, sp in enumerate(disease_types):
    for file in os.listdir(os.path.join(train_dir, sp)):
        train_data.append(['{}/{}'.format(sp, file), defects_id, sp])
        
train = pd.DataFrame(train_data, columns=['File', 'DiseaseID','Disease Type'])
train.head()

In [None]:
SEED = 42
train = train.sample(frac=1, random_state=SEED) 
train.index = np.arange(len(train)) # Reset indices
train.head()

## Image Read and Resize Function

In [None]:
IMAGE_SIZE = 128
def read_image(filepath):
    return cv2.imread(os.path.join(data_dir, filepath)) # Loading a color image is the default flag
# Resize image to target size
def resize_image(image, image_size):
    return cv2.resize(image.copy(), image_size, interpolation=cv2.INTER_AREA)

## Training Images

In [None]:
X = np.zeros((train.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3))
for i, file in tqdm(enumerate(train['File'].values)):
    image = read_image(file)
    if image is not None:
        X[i] = resize_image(image, (IMAGE_SIZE, IMAGE_SIZE))
# Normalize the data
X = X / 255.
print('Train Shape: {}'.format(X.shape))

In [None]:
from sklearn.model_selection import RepeatedKFold
cv = RepeatedKFold(n_splits=2, n_repeats=5, random_state=42)

In [None]:
k=0
acc_imp=[]
acc_norm=[]
acc_incept=[]

imp_acc=[]
nor_acc=[]

for train_index, test_index in cv.split(X,Y):
    estimators_imp=[]
    estimators = []
    X_train = X[train_index]
    X_test = X[test_index]
    Y_train = Y[train_index]
    Y_test = Y[test_index]
    
    imp_feature_size=1
    foz=4
    n_estimators=3
    num_class = 1
    imp_tree_predicts = []
    tree_predicts = []
    base_model,modely=Inceptionv3(num_class)
    callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)
    history = modely.fit(X_train, Y_train,epochs=15,batch_size=32, validation_data=(X_test, Y_test),callbacks=[callback])
    pred= modely.predict(X_test)
    predicted_class=np.argmax(pred,axis=1)
    X_train_features,X_valid_features=incept_rf(base_model,modely,X_train,X_test,k)
    X_train_features = pd.DataFrame(X_train_features)
    X_valid_features = pd.DataFrame(X_valid_features)
    d=len(X_train_features.columns)
    imp_tree_predicts = []
    tree_predicts = []
    for i in range(0,n_estimators):
        imp_tr, imp_ts = generate_imp_space(X_train_features, Y_train, X_valid_features, imp_feature_size, foz)
        imp_d = imp_tr.shape[1]

      #meta learner params
        imp_sel_d = 2* round(math.log2(imp_d)) #feature
        sel_d = 2*round(math.log2(d))
      
        imp_tree = GradientBoostingClassifier(max_features=imp_sel_d, n_estimators=50,random_state=42)#,random_state=42
        #classifier = MultiOutputClassifier(imp_tree, n_jobs=-1)
        estimators_imp.append(('imp_rf'+str(i), imp_tree))
        imp_tree.fit(imp_tr, Y_train)
        imp_tree_predicts.append(imp_tree.predict(imp_ts))


        tree = GradientBoostingClassifier(max_features=sel_d, n_estimators=50,random_state=42)#, random_state=42
        estimators.append(('nor_rf'+str(i), tree))
        tree.fit(X_train_features, Y_train)
        tree_predicts.append(tree.predict(X_valid_features))
        
    voting_imp = VotingClassifier(estimators=estimators_imp)
    voting_imp.fit(imp_tr, Y_train)
    imp_pre=voting_imp.predict(imp_ts)
    voting_norm = VotingClassifier(estimators=estimators)
    voting_norm.fit(X_train_features, Y_train)
    imp_norm=voting_norm.predict(X_valid_features)
    #result_imp = MajorityVoting(np.array(imp_tree_predicts))
    #result_norm = MajorityVoting(np.array(tree_predicts))
    #act=np.argmax(Y_test,axis=1)
    imp_acc.append(metrics.accuracy_score(Y_test, imp_pre))
    nor_acc.append(metrics.accuracy_score(Y_test, imp_norm))
    #acc_imp.append(metrics.accuracy_score(Y_test, result_imp))
    #acc_norm.append(metrics.accuracy_score(Y_test, result_norm))
    acc_incept.append(metrics.accuracy_score(Y_test, predicted_class))
    k+=1
                                                        
                                        
    

In [None]:
tf.keras.utils.plot_model(modely)

In [None]:
print("Sars-Cov2")
print(np.mean(imp_acc))
print(np.mean(nor_acc))
print(np.mean(acc_incept))

In [None]:
train['DiseaseID'].value_counts()

In [None]:
Y = train['DiseaseID'].values


In [None]:
def incept_rf(base_model,modely,train_imgs,test_imgs,i):
  X_train_features = []
  X_valid_features = []
  if(i==0):

    feature_network = Model(base_model.input, modely.get_layer('flatten').output)
  else:
    feature_network = Model(base_model.input, modely.get_layer('flatten_'+str(i)).output)
  X_train_features = feature_network.predict(train_imgs)  # Assuming you have your images in x
  X_valid_features = feature_network.predict(test_imgs)
  return X_train_features,X_valid_features


## InceptionV3

In [None]:
def Inceptionv3(class_no):
  print("-------------------------------------Inceptionv3--------------------------------------------")
  input_shape_densenet = (128, 128, 3)
  incept_model = tf.keras.applications.InceptionV3(include_top=False,weights="imagenet",input_shape=input_shape_densenet)
  incept_model.trainable = True
  
  x = tf.keras.layers.MaxPooling2D()(incept_model.output)
  x = tf.keras.layers.Flatten()(x)
  x = tf.keras.layers.Dropout(0.5)(x)
  x = tf.keras.layers.Dense(512, activation='relu',kernel_regularizer='l2')(x)
  x = tf.keras.layers.Dropout(0.5)(x)
  layer = tf.keras.layers.Dense(units=class_no,activation='sigmoid',kernel_regularizer='l2')(x)
  model = tf.keras.models.Model(incept_model.input, outputs=layer)
  model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4),loss='binary_crossentropy',metrics=['acc'])
  #history = model.fit(train_imgs, train_labels, batch_size=32, epochs=num_epochs,verbose=0)
  #X_train_features,X_valid_features=incept_rf(incept_model,model,train_imgs,test_imgs,k)
  #result_imp,result_norm=imp_incept_rf(X_train_features,X_valid_features,train_labels)
  #print("------------------------------------------------------------------------------------------")
  return incept_model, model

In [None]:
def generate_imp_space(X_train, Y_train, X_test, imp_feature_size, foz):
  imp_train_data = X_train.values
  imp_test_data = X_test.values
  Y_train=pd.DataFrame(Y_train)
  d = len(X_train.columns)
  # print("____1_____")
  for i in range(0,imp_feature_size*foz):
    Xindis = np.random.permutation(d)
    for j in range(0,d-(foz-1),foz):  #d/foz kadar doner
      sX = np.random.permutation(num_class)
      s1 = sX[0]
      # print("____2_____")
      s1data = X_train[X_train.index.isin(Y_train[Y_train == str(s1)].index)]
      s2data = X_train[~X_train.index.isin(Y_train[Y_train == str(s1)].index)]
      s1data = s1data.iloc[:,Xindis[j:j+(foz)]]
      s2data = s2data.iloc[:,Xindis[j:j+(foz)]] # s1 vs all other classes, #foz feature
      # print("____3_____")
      s1label = np.ones((s1data.values.shape[0],1),dtype=int)
      s2label = -1*np.ones((s2data.values.shape[0],1),dtype=int)
      Wdata = np.concatenate((s1data,s2data))
      # print("____4_____")
      
      Wdata = x2fx(Wdata)
      Wlabel = np.concatenate((s1label,s2label))
      W = np.matmul(np.matmul(np.linalg.pinv(np.matmul(Wdata.T, Wdata)),Wdata.T),Wlabel)
      
      WW = x2fx(X_train.iloc[:,Xindis[j:j+(foz)]].values)
      imp_train_data = np.concatenate((imp_train_data, np.matmul(WW,W)),axis=1)
      
      TT = x2fx(X_test.iloc[:,Xindis[j:j+(foz)]].values)
      imp_test_data = np.concatenate((imp_test_data, np.matmul(TT,W)),axis=1)
    
  return imp_train_data,imp_test_data
  

In [None]:
def x2fx(x, model='quadratic'):
    linear = np.c_[np.ones(x.shape[0]), x]
    if model == 'linear':
        return linear
    if model == 'purequadratic':
        return np.c_[linear, x**2]
    interaction = np.array([x[:,i]*x[:,j] for i, j in comb(range(x.shape[1]), 2)]).T
    if model == 'interaction':
        return np.c_[linear, interaction]
    if model == 'quadratic':
        return np.c_[linear, interaction, x**2]


In [None]:
def MajorityVoting(votes):
  results = []
  for i in range(0,votes.shape[1]):
    values, counts = np.unique(votes[:,i], return_counts=True)
    
    results.append(values[np.argmax(counts)])
  return np.array(results)

In [None]:
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from itertools import combinations as comb
import ntpath
import glob
import os
import math
imp_feature_size=1
foz=4
n_estimators=5
imp_tree_predicts = []
tree_predicts = []
accuracies = []
accuracies_imp = []  
X_train_features = pd.DataFrame(X_train_features)
X_valid_features = pd.DataFrame(X_valid_features)
num_class = len(np.unique(Y_train))
d=len(X_train_features.columns)
for i in range(0,n_estimators):
      imp_tr, imp_ts = generate_imp_space(X_train_features, Y_train, X_valid_features, imp_feature_size, foz)
      imp_d = imp_tr.shape[1]

      #meta learner params
      imp_sel_d = 2* round(math.log2(imp_d)) #feature
      sel_d = 2*round(math.log2(d))
      
      imp_tree = RandomForestClassifier(max_features=imp_sel_d, n_estimators=50,random_state=42)#,random_state=42
      imp_tree.fit(imp_tr, Y_train)
      imp_tree_predicts.append(imp_tree.predict(imp_ts))


      tree = RandomForestClassifier(max_features=sel_d, n_estimators=50,random_state=42)#, random_state=42
      tree.fit(X_train_features, Y_train)
      tree_predicts.append(tree.predict(X_valid_features))

results_imp = MajorityVoting(np.array(imp_tree_predicts))
results = MajorityVoting(np.array(tree_predicts))
act=np.argmax(Y_val,axis=1)
    # print("--------------  {}  ----------------".format(ds.upper()))
accuracies.append(metrics.accuracy_score(act, results))
accuracies_imp.append(metrics.accuracy_score(act, results_imp))