In [1]:
import numpy as np
import numpy.random as nprand
import matplotlib.pyplot as plt
import pandas as pd
import math as math

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif 
from sklearn.model_selection import cross_val_score

Data Loading and preprocessing:

In [2]:
# The original data = df
df = pd.read_csv('data.csv',
    header=None,
    index_col=False).T

Select labels, light and feature

In [3]:
lesions=df.iloc[:,1].apply(pd.to_numeric)
light=df.iloc[:,2].apply(pd.to_numeric)
features=df.iloc[:,3:-1].apply(pd.to_numeric)


In [4]:
from sklearn.preprocessing import OneHotEncoder
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = lesions.values.reshape(len(lesions), 1)
lesions_encoded = onehot_encoder.fit_transform(integer_encoded)

In [48]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, lesions_encoded, test_size=0.2,stratify=lesions_encoded)


In [136]:
lesions.value_counts()

3    80
1    42
2    30
Name: 1, dtype: int64

In [129]:
import keras.layers as layers
from keras.models import Model
import tensorflow as tf

def normalize_with_moments(x, axes=[0, 1], epsilon=1e-8):
    mean, variance = tf.nn.moments(x, axes=axes)
    x_normed = (x - mean) / tf.sqrt(variance + epsilon) # epsilon to avoid dividing by zero
    return x_normed

class calculateInteraction(layers.Layer):
    def __init__(self,featureSize):
        super(calculateInteraction, self).__init__()
        mask = tf.random_normal_initializer()
        self.mask = tf.Variable(
            initial_value=mask(shape=(1, featureSize), dtype="float32"),
            trainable=True,
        )

        randomIn = tf.random_normal_initializer()
        self.w = tf.Variable(
            initial_value=randomIn(shape=(featureSize, 1), dtype="float32"),
            trainable=True,
        )
    
    def call(self, x):

        Mx=tf.multiply(self.mask,x)
        query_value_attention_seq = layers.Attention()([x, Mx])
        query_value_attention_seq=tf.expand_dims(query_value_attention_seq,2)
        query_value_attention_seq=normalize_with_moments(query_value_attention_seq)
        query_value_attention = layers.GlobalAveragePooling1D()(
            query_value_attention_seq)
        Mx=tf.expand_dims(Mx,2)
        # query_value_attention_seq=tf.multiply(query_value_attention_seq,self.w)
        concatenated = layers.Concatenate()(
            [Mx, query_value_attention_seq])
     
        print(concatenated.shape)
        return concatenated

def getModel(inputSize):
    
    inputs = layers.Input(shape=inputSize)
    x=layers.Dense(24,activation='relu')(inputs)
    concatenated=calculateInteraction(24)(x)
    concatenated=layers.Flatten()(concatenated)
    output=layers.Dense(3,activation='softmax')(concatenated)
    
    METRICS = [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
     ]   
    model = Model(inputs=inputs, outputs=output)
    model.compile(tf.keras.optimizers.Adam(lr=0.001),loss='categorical_crossentropy',metrics=METRICS)
    model.summary()
    return model




In [130]:
total=lesions_encoded.shape[0]
c1=len(np.where(lesions_encoded[:,0]==1)[0])
c2=len(np.where(lesions_encoded[:,1]==1)[0])
c3=len(np.where(lesions_encoded[:,2]==1)[0])

model=getModel(X_train.shape[1])

batch_size=32
weight_for_0 = (batch_size / c1)*(total)/3.0 
weight_for_1 = (batch_size / c2)*(total)/3.0
weight_for_2 = (batch_size/ c3)*(total)/3.0
model.summary()

(None, 24, 2)
Model: "model_26"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_32 (InputLayer)        [(None, 697)]             0         
_________________________________________________________________
dense_51 (Dense)             (None, 24)                16752     
_________________________________________________________________
calculate_interaction_31 (ca (None, 24, 2)             48        
_________________________________________________________________
flatten_28 (Flatten)         (None, 48)                0         
_________________________________________________________________
dense_52 (Dense)             (None, 3)                 147       
Total params: 16,947
Trainable params: 16,947
Non-trainable params: 0
_________________________________________________________________
Model: "model_26"
_________________________________________________________________
Layer (type)          

In [131]:


class_weight = {0: weight_for_0, 1: weight_for_1,2:weight_for_2}
es = tf.keras.callbacks.EarlyStopping(monitor='val_recall', mode='max', verbose=1,baseline=0.75,patience=500)
history=model.fit(X_train,y_train,epochs=500,batch_size=batch_size,validation_split=0.2,class_weight=class_weight,callbacks=[es])

===] - 0s 13ms/step - loss: 37.3799 - accuracy: 0.8142 - precision: 0.7225 - recall: 0.7188 - auc: 0.8932 - val_loss: 1.5808 - val_accuracy: 0.7867 - val_precision: 0.6957 - val_recall: 0.6400 - val_auc: 0.8020
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch

In [132]:
from sklearn.metrics import classification_report

labelNames=['hyperplasic', 'serrated', 'adenoma']
y_output=model.predict(X_test)
y_output_transformed= onehot_encoder.inverse_transform(y_output)
y_test_transformed= onehot_encoder.inverse_transform(y_test)
print(classification_report(y_test_transformed, y_output_transformed, labels=[1,2,3]))

y_output_train=model.predict(X_train)
y_output_transformed_train= onehot_encoder.inverse_transform(y_output_train)
y_train_transformed= onehot_encoder.inverse_transform(y_train)
print(classification_report(y_train_transformed, y_output_transformed_train, labels=[1,2,3]))


(None, 24, 2)
              precision    recall  f1-score   support

           1       0.57      0.89      0.70         9
           2       0.38      0.50      0.43         6
           3       0.78      0.44      0.56        16

    accuracy                           0.58        31
   macro avg       0.57      0.61      0.56        31
weighted avg       0.64      0.58      0.57        31

              precision    recall  f1-score   support

           1       0.78      0.94      0.85        33
           2       0.53      0.83      0.65        24
           3       0.88      0.59      0.71        64

    accuracy                           0.74       121
   macro avg       0.73      0.79      0.73       121
weighted avg       0.78      0.74      0.74       121

