<ol>1. Loading the dataset</ol>
<ol>2. Pre-processing the data</ol>
<ol>3. Creating training and validation set</ol>
<ol>4. Defining the model architecture</ol>
<ol>5. Compiling the model</ol>
<ol>6. Training the model</ol>
<ol>7. Evaluating model performance</ol>

In [1]:
import tensorflow as tf
import keras
print('tensorflow version: ', tf.__version__)
print('keras version: ', keras.__version__)

tensorflow version:  2.13.0
keras version:  2.13.1


In [2]:
# import necessary libraries and functions
import os
import joblib
import numpy as np
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt
%matplotlib inline

from keras.layers import Dense, InputLayer
from keras.models import Sequential

from tqdm import tqdm_notebook

from sklearn.model_selection import train_test_split

In [3]:
# create random number generator
seed = 42
rng = np.random.RandomState(seed)

In [4]:
TRAIN_DATA_PATH = "train_nLPp5K8"
train_data = pd.read_csv(TRAIN_DATA_PATH+'/train.csv')
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12196 entries, 0 to 12195
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   image_names  12196 non-null  object
 1   class        12196 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 190.7+ KB


In [5]:
train_data['class'].value_counts()

1    6174
0    6022
Name: class, dtype: int64

In [6]:
len(os.listdir(TRAIN_DATA_PATH+'/images'))
17543

17543

In [7]:
# load images
X = []
not_found = []
num_images_read = 2500
for img_name in tqdm_notebook(train_data.image_names[:num_images_read]):
#for img_name in tqdm_notebook(train_data.image_names):
  img = plt.imread(TRAIN_DATA_PATH+'/images/'+img_name)
  X.append(img)

X = np.array(X)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for img_name in tqdm_notebook(train_data.image_names[:num_images_read]):


  0%|          | 0/2500 [00:00<?, ?it/s]

In [8]:
X.shape

(2500, 224, 224, 3)

In [9]:
# preprocessing
# converting into 1-d array cause MLP works with 1-d arrays
#X = X.reshape(X.reshape[0], 224*224*3)
#X.shape

In [10]:
X.min(), X.max()

(0, 255)

In [11]:
# normalizar pixeles
X = X / X.max()

X.min(), X.max()

(0.0, 1.0)

In [12]:
# split dataset
y = train_data['class'].values[:num_images_read]
X_train, X_valid, y_train, y_valid = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=seed
)

In [13]:
# Model architecture
from tensorflow.keras.preprocessing.image import ImageGenerator
from keras.layers import (
    InputLayer,
    Dense,
    Dropout,
    BatchNormalization,
    Flatten
    )
from keras.initializers import glorot_normal
from keras.callbacks import (
    EarlyStopping,
    ModelCheckpoint
    )
from keras.models import Sequential
from keras.optimizers import Adam

ImportError: cannot import name 'ImageGenerator' from 'tensorflow.keras.preprocessing.image' (/opt/homebrew/Caskroom/miniforge/base/envs/tensorflow/lib/python3.8/site-packages/keras/api/_v2/keras/preprocessing/image/__init__.py)

In [None]:
image_augmentation = ImageGenerator(
    rotation_image=45,
    width_shift_range=40,
    height_shift_range=40,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)
image_augmentation.fit(X_train)

In [None]:
model = Sequential()
model.add(InputLayer(input_shape=(224, 224, 3)))
model.add(Flatten())
model.add(Dense(units=100, activation='relu', kernel_initializer=glorot_normal(seed=seed)))
model.add(BatchNormalization())
model.add(Dropout(rate=0.5))
model.add(Dense(units=100, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(rate=0.5))
model.add(Dense(units=1, activation='sigmoid'))


model.summary()

In [None]:
# define earlystopping
early_stopping = EarlyStopping(
    monitor='val_loss',
    min_delta=0.01,
    patience=5,
    mode='min'
)
# optimizer
adam = Adam(lr=1e-5, clipvalue=1)

# model checkpointing
filepath = TRAIN_DATA_PATH+'/best_weights.hdf5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', mode='min')
callbacks_list = [early_stopping, checkpoint]


In [None]:
model.compile(
    loss='binary_cross_entropy',
    optimizer=adam,
    metrics=['accuracy'],
    callbacks=[early_stopping]
)

In [None]:
model_history = model.fit_generator(
    image_augmentation.flow(X_train, y_train, batch_size=128),
    validation_data=(X_valid, y_valid),
    epochs=50,
    callbacks=callbacks_list
)

In [None]:
# summarize history for loss
plt.plot(model_history.history['loss'])
plt.plot(model_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'],loc='upper left')
plt.show()