In [1]:
import numpy as np
import numpy.random as nprand
import matplotlib.pyplot as plt
import pandas as pd
import math as math

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif 
from sklearn.model_selection import cross_val_score

Data Loading and preprocessing:

In [2]:
# The original data = df
df = pd.read_csv('data.csv',
    header=None,
    index_col=False).T

Select labels, light and feature

In [3]:
lesions=df.iloc[:,1].apply(pd.to_numeric)
light=df.iloc[:,2].apply(pd.to_numeric)
features=df.iloc[:,3:-1].apply(pd.to_numeric)


In [4]:
from sklearn.preprocessing import OneHotEncoder
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = lesions.values.reshape(len(lesions), 1)
lesions_encoded = onehot_encoder.fit_transform(integer_encoded)

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, lesions_encoded, test_size=0.2,stratify=lesions_encoded)


In [124]:
import keras.layers as layers
from keras.models import Model
import tensorflow as tf

class calculateInteraction(layers.Layer):
    def __init__(self,featureSize):
        super(calculateInteraction, self).__init__()
        mask = tf.zeros_initializer()
        self.mask = tf.Variable(
            initial_value=mask(shape=(1, featureSize), dtype="float32"),
            trainable=True,
        )
    
    def call(self, x):
        Mx=tf.math.multiply(self.mask,x)
        Rxx=tf.matmul(Mx,Mx,transpose_b=True)

        query_value_attention_seq = layers.Attention()([Rxx, Rxx])
        query_value_attention_seq=tf.expand_dims(query_value_attention_seq,2)
        query_value_attention = layers.GlobalAveragePooling1D()(
            query_value_attention_seq)

        concatenated = layers.Concatenate()(
            [Mx, query_value_attention])
        print(concatenated.shape)
        return concatenated

def getModel(inputSize):
    
    inputs = layers.Input(shape=inputSize)
    concatenated=calculateInteraction(inputs.shape[1])(inputs)
    concatenated=layers.Flatten()(concatenated)
    x=layers.Dense(32)(concatenated)
    x=layers.Dense(16)(x)
    output=layers.Dense(3,activation='softmax')(x)

    METRICS = [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
     ]   
    model = Model(inputs=inputs, outputs=output)
    model.compile(optimizer='nadam',loss='categorical_crossentropy',metrics=METRICS)
    model.summary()
    return model




In [139]:
total=lesions_encoded.shape[0]
c1=len(np.where(lesions_encoded[:,0]==1)[0])
c2=len(np.where(lesions_encoded[:,1]==1)[0])
c3=len(np.where(lesions_encoded[:,2]==1)[0])

model=getModel(X_train.shape[1])

weight_for_0 = (1 / c1)*(total)/3.0 
weight_for_1 = (1 / c2)*(total)/3.0
weight_for_2 = (1 / c3)*(total)/3.0

class_weight = {0: weight_for_0, 1: weight_for_1,2:weight_for_2}
class_weight
es = tf.keras.callbacks.EarlyStopping(monitor='val_recall', mode='max', verbose=1,baseline=0.85,patience=300)
history=model.fit(X_train,y_train,epochs=1000,batch_size=8,validation_split=0.2,class_weight=class_weight,callbacks=[es])

 0.5854 - accuracy: 0.8768 - precision: 0.8282 - recall: 0.7979 - auc: 0.9400 - val_loss: 0.6873 - val_accuracy: 0.8667 - val_precision: 0.8261 - val_recall: 0.7600 - val_auc: 0.8884
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch

In [140]:
from sklearn.metrics import classification_report

labelNames=['hyperplasic', 'serrated', 'adenoma']
y_output=model.predict(X_test)
y_output_transformed= onehot_encoder.inverse_transform(y_output)
y_test_transformed= onehot_encoder.inverse_transform(y_test)
print(classification_report(y_test_transformed, y_output_transformed, labels=[1,2,3]))

y_output_train=model.predict(X_train)
y_output_transformed_train= onehot_encoder.inverse_transform(y_output_train)
y_train_transformed= onehot_encoder.inverse_transform(y_train)



(None, 698)
              precision    recall  f1-score   support

           1       0.70      0.78      0.74         9
           2       0.38      0.83      0.53         6
           3       0.88      0.44      0.58        16

    accuracy                           0.61        31
   macro avg       0.65      0.68      0.62        31
weighted avg       0.73      0.61      0.62        31

