In [28]:
import pandas as pd
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, models
import numpy as np
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [29]:
train = pd.read_csv('train.csv',index_col=0)
test = pd.read_csv('test.csv',index_col=0)

In [30]:
train[train.isna().any(axis=1)]

Unnamed: 0_level_0,MO HLADR+ MFI (cells/ul),Neu CD64+MFI (cells/ul),CD3+T (cells/ul),CD8+T (cells/ul),CD4+T (cells/ul),NK (cells/ul),CD19+ (cells/ul),CD45+ (cells/ul),Age,Sex 0M1F,Mono CD64+MFI (cells/ul),label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
39,,,1336.54,739.71,550.3,68.46,192.07,1615.68,21,0,,0


In [31]:
test[test.isna().any(axis=1)]

Unnamed: 0_level_0,MO HLADR+ MFI (cells/ul),Neu CD64+MFI (cells/ul),CD3+T (cells/ul),CD8+T (cells/ul),CD4+T (cells/ul),NK (cells/ul),CD19+ (cells/ul),CD45+ (cells/ul),Age,Sex 0M1F,Mono CD64+MFI (cells/ul)
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


In [3]:
train.fillna(train.mean(), inplace=True)

In [4]:
train = shuffle(train, random_state=5001)

In [5]:
train.reset_index(inplace=True,drop=True)

In [6]:
y_train = train.pop("label")
x_train = train

In [7]:
y_train.value_counts()

0    58
1    29
Name: label, dtype: int64

In [8]:
X_train, X_test, y_train, y_test = train_test_split(x_train,y_train, test_size=0.2, random_state=5001)

In [9]:
print(X_train.shape,y_train.shape,X_test.shape,y_test.shape)

(69, 11) (69,) (18, 11) (18,)


In [10]:
# configurations
lr = 0.01
batch_size = 8
epochs = 15

In [11]:
x_train.shape

(87, 11)

In [12]:
normalizer = tf.keras.layers.experimental.preprocessing.Normalization(axis=-1)

In [13]:
normalizer.adapt(np.array(x_train))

In [14]:
model = tf.keras.Sequential([
    normalizer,
    tf.keras.Input(shape=(11,)),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.1),
    # layers.BatchNormalization(),
    # layers.Dense(16, activation='relu'),
    # layers.Dropout(0.1),
    layers.Dense(1)
])
checkpoint_filepath = "model_weights.hdf5"
optimizer = Adam(learning_rate=lr)
checkpointer = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor="val_accuracy",
    verbose=0,
    save_best_only=True)
criterion = tf.losses.BinaryCrossentropy(from_logits=True)

model.compile(loss = criterion,
             optimizer = optimizer,
             metrics=['accuracy'])

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
normalization (Normalization (None, 11)                23        
_________________________________________________________________
input_1 (InputLayer)         multiple                  0         
_________________________________________________________________
dense (Dense)                (None, 64)                768       
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
Total params: 856
Trainable params: 833
Non-trainable params: 23
_________________________________________________________________


In [16]:
history = model.fit(X_train,y_train,epochs=epochs,validation_data=(X_test,y_test),batch_size=batch_size,callbacks=[checkpointer])

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [17]:
model.load_weights(checkpoint_filepath)

In [18]:
model.evaluate(X_test,y_test)



[0.2621140778064728, 0.9444444179534912]

In [19]:
y_pred = model.predict(X_test)

In [20]:
y_pred

array([[-2.5328207 ],
       [-1.9307722 ],
       [-2.8147402 ],
       [ 3.0649352 ],
       [ 0.3405013 ],
       [-3.1917264 ],
       [19.39797   ],
       [-3.1704817 ],
       [ 0.54546916],
       [-1.7840283 ],
       [-1.9195632 ],
       [-2.8315578 ],
       [-3.3995829 ],
       [ 0.6168875 ],
       [-5.0360856 ],
       [-1.6847272 ],
       [-3.4277778 ],
       [-3.09892   ]], dtype=float32)

In [21]:
y_test

83    0
49    0
2     0
20    1
53    0
11    0
19    1
0     0
15    1
62    0
51    1
61    0
72    0
38    1
24    0
23    0
80    0
84    0
Name: label, dtype: int64

In [22]:
pred_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Activation('sigmoid')
])
pred_model.compile()

In [23]:
y_sigmoid = pred_model(X_test)
y_sigmoid

<tf.Tensor: shape=(18, 1), dtype=float32, numpy=
array([[0.07358912],
       [0.12666512],
       [0.0565328 ],
       [0.955423  ],
       [0.5843123 ],
       [0.03947824],
       [1.        ],
       [0.04029176],
       [0.6330837 ],
       [0.14380646],
       [0.12791026],
       [0.05564246],
       [0.03230852],
       [0.6495104 ],
       [0.00645715],
       [0.1564705 ],
       [0.03143853],
       [0.04315183]], dtype=float32)>

In [24]:
threshold = 0.5

In [25]:
output = tf.squeeze(tf.where(y_sigmoid > threshold, 1, 0),-1)
output

<tf.Tensor: shape=(18,), dtype=int32, numpy=array([0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0])>

In [26]:
np.mean(np.equal(output,y_test))

0.8888888888888888

In [27]:
print("accuracy:")
print(accuracy_score(y_test, output))
print("classification report:")
print(classification_report(y_test, output))
print("confusion matrix:")
print(confusion_matrix(y_test, output))

accuracy:
0.8888888888888888
classification report:
              precision    recall  f1-score   support

           0       0.92      0.92      0.92        13
           1       0.80      0.80      0.80         5

    accuracy                           0.89        18
   macro avg       0.86      0.86      0.86        18
weighted avg       0.89      0.89      0.89        18

confusion matrix:
[[12  1]
 [ 1  4]]
