In [1]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.utils import to_categorical, img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Input, concatenate
from ml_logic.preprocessor import data_balancing, create_augmented_model
from ml_logic.preprocessor import load_and_preprocess_image


In [2]:
train = pd.read_csv('../data/RFMiD_Training_Labels.csv').set_index('ID')
test = pd.read_csv('../data/RFMiD_Testing_Labels.csv').set_index('ID')
eval = pd.read_csv('../data/RFMiD_Validation_Labels.csv').set_index('ID')


In [16]:
# X_train = train.drop(columns='Disease_Risk')
X_train = data_balancing(table_link='../data/')
# y_train = train['Disease_Risk']
y_train = X_train
X_eval  = eval.drop(columns='Disease_Risk')
y_eval = eval['Disease_Risk']


In [4]:
image_folder = '../data/training_images'
images = np.array([load_and_preprocess_image(row_id, image_folder) for row_id in X_train.index])
# images = np.array([load_and_preprocess_image(X_train.iloc[0].ID, image_folder)])
images.shape


(802, 224, 224, 3)

In [5]:
eval_image_folder = '../data/eval_images'
eval_images = np.array([load_and_preprocess_image(row_id, image_folder) for row_id in X_eval.index])


In [20]:
image_input = Input(shape=(224, 224, 3))

x = Conv2D(32, (3, 3), activation='relu')(image_input)
x = MaxPooling2D(2, 2)(x)
x = Flatten()(x)


#  NOTE: row_input would cause data leakage so it's been commented out.

# row_input = Input(shape=(X_train.shape[1],))
# y = Dense(64, activation='relu')(row_input)

# combined = concatenate([x, y])

z = Dense(12, activation='relu')(x)
z = Dense(64, activation='relu')(z)
z = Dense(1, activation='sigmoid')(z)


model = Model(inputs=image_input, outputs=z)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [21]:
model.summary()


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv2d_1 (Conv2D)           (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 111, 111, 32)      0         
 g2D)                                                            
                                                                 
 flatten_1 (Flatten)         (None, 394272)            0         
                                                                 
 dense_3 (Dense)             (None, 12)                4731276   
                                                                 
 dense_4 (Dense)             (None, 64)                832       
                                                           

In [18]:
model.fit(
    images, y_train,
    validation_data=(eval_images, y_eval),
    epochs=10,
    batch_size=32
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2979c6e00>

In [19]:
model.evaluate(eval_images, y_eval)




[0.6920269727706909, 0.7906249761581421]