In [1]:
import numpy as np
import numpy.random as nprand
import matplotlib.pyplot as plt
import pandas as pd
import math as math

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif 
from sklearn.model_selection import cross_val_score

Data Loading and preprocessing:

In [2]:
# The original data = df
df = pd.read_csv('data.csv',
    header=None,
    index_col=False).T

Select labels, light and feature

In [3]:
lesions=df.iloc[:,1].apply(pd.to_numeric)
light=df.iloc[:,2].apply(pd.to_numeric)
features=df.iloc[:,3:-1].apply(pd.to_numeric)


In [4]:
from sklearn.preprocessing import OneHotEncoder
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = lesions.values.reshape(len(lesions), 1)
lesions_encoded = onehot_encoder.fit_transform(integer_encoded)

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, lesions_encoded, test_size=0.2,stratify=lesions_encoded)


In [192]:
import keras.layers as layers
from keras.models import Model
import tensorflow as tf

def normalize_with_moments(x, axes=[0, 1], epsilon=1e-8):
    mean, variance = tf.nn.moments(x, axes=axes)
    x_normed = (x - mean) / tf.sqrt(variance + epsilon) # epsilon to avoid dividing by zero
    return x_normed

class calculateInteraction(layers.Layer):
    def __init__(self,featureSize):
        super(calculateInteraction, self).__init__()
        mask = tf.random_normal_initializer()
        self.mask = tf.Variable(
            initial_value=mask(shape=(1, featureSize), dtype="float32"),
            trainable=True,
        )

        randomIn = tf.random_normal_initializer()
        self.w = tf.Variable(
            initial_value=randomIn(shape=(featureSize, 13), dtype="float32"),
            trainable=True,
        )
    
    def call(self, x):
        Mx=tf.multiply(self.mask,x)
        Rxx=tf.matmul(Mx,Mx,transpose_b=True)
        Rxx = 1-normalize_with_moments(Rxx)
        xRxx=tf.matmul(Rxx,Mx)

        query_value_attention_seq = layers.Attention()([xRxx, Mx])
        query_value_attention_seq=tf.expand_dims(query_value_attention_seq,2)
        # query_value_attention_seq=normalize_with_moments(query_value_attention_seq)
        query_value_attention = layers.GlobalAveragePooling1D()(
            query_value_attention_seq)
        Mx=tf.expand_dims(Mx,2)
        query_value_attention_seq=tf.multiply(query_value_attention_seq,self.w)
        concatenated = layers.Concatenate()(
            [Mx, query_value_attention_seq])
        
        print(concatenated.shape)
        return concatenated

def getModel(inputSize):
    
    inputs = layers.Input(shape=inputSize)
    concatenated=calculateInteraction(inputs.shape[1])(inputs)
    concatenated=layers.Flatten()(concatenated)
    x=layers.Dense(64)(concatenated)
    x=layers.Dense(16)(x)
    output=layers.Dense(3,activation='softmax')(x)
    
    METRICS = [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
     ]   
    model = Model(inputs=inputs, outputs=output)
    model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=METRICS)
    model.summary()
    return model




In [193]:
total=lesions_encoded.shape[0]
c1=len(np.where(lesions_encoded[:,0]==1)[0])
c2=len(np.where(lesions_encoded[:,1]==1)[0])
c3=len(np.where(lesions_encoded[:,2]==1)[0])

model=getModel(X_train.shape[1])

batch_size=16
weight_for_0 = (batch_size / c1)*(total)/3.0 
weight_for_1 = (batch_size / c2)*(total)/3.0
weight_for_2 = (batch_size/ c3)*(total)/3.0

class_weight = {0: weight_for_0, 1: weight_for_1,2:weight_for_2}
es = tf.keras.callbacks.EarlyStopping(monitor='val_recall', mode='max', verbose=1,baseline=0.75,patience=699)
history=model.fit(X_train,y_train,epochs=1000,batch_size=batch_size,validation_split=0.2,class_weight=class_weight,callbacks=[es])

 9.5913 - accuracy: 0.9171 - precision: 0.8756 - recall: 0.8756 - auc: 0.9441 - val_loss: 3.1255 - val_accuracy: 0.7600 - val_precision: 0.6400 - val_recall: 0.6400 - val_auc: 0.7732
Epoch 641/1000
Epoch 642/1000
Epoch 643/1000
Epoch 644/1000
Epoch 645/1000
Epoch 646/1000
Epoch 647/1000
Epoch 648/1000
Epoch 649/1000
Epoch 650/1000
Epoch 651/1000
Epoch 652/1000
Epoch 653/1000
Epoch 654/1000
Epoch 655/1000
Epoch 656/1000
Epoch 657/1000
Epoch 658/1000
Epoch 659/1000
Epoch 660/1000
Epoch 661/1000
Epoch 662/1000
Epoch 663/1000
Epoch 664/1000
Epoch 665/1000
Epoch 666/1000
Epoch 667/1000
Epoch 668/1000
Epoch 669/1000
Epoch 670/1000
Epoch 671/1000
Epoch 672/1000
Epoch 673/1000
Epoch 674/1000
Epoch 675/1000
Epoch 676/1000
Epoch 677/1000
Epoch 678/1000
Epoch 679/1000
Epoch 680/1000
Epoch 681/1000
Epoch 682/1000
Epoch 683/1000
Epoch 684/1000
Epoch 685/1000
Epoch 686/1000
Epoch 687/1000
Epoch 688/1000
Epoch 689/1000
Epoch 690/1000
Epoch 691/1000
Epoch 692/1000
Epoch 693/1000
Epoch 694/1000
Epoch 6

In [194]:
from sklearn.metrics import classification_report

labelNames=['hyperplasic', 'serrated', 'adenoma']
y_output=model.predict(X_test)
y_output_transformed= onehot_encoder.inverse_transform(y_output)
y_test_transformed= onehot_encoder.inverse_transform(y_test)
print(classification_report(y_test_transformed, y_output_transformed, labels=[1,2,3]))

y_output_train=model.predict(X_train)
y_output_transformed_train= onehot_encoder.inverse_transform(y_output_train)
y_train_transformed= onehot_encoder.inverse_transform(y_train)



(None, 697, 14)
              precision    recall  f1-score   support

           1       0.90      1.00      0.95         9
           2       0.33      0.33      0.33         6
           3       0.73      0.69      0.71        16

    accuracy                           0.71        31
   macro avg       0.66      0.67      0.66        31
weighted avg       0.70      0.71      0.71        31

