In [1]:
import numpy as np
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
import cv2
import os
import glob
import h5py
import sklearn

import functions as fn

import tensorflow as tf
from tensorflow import keras
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger

seed=42



In [2]:
physical_devices = tf.config.list_physical_devices()
for dev in physical_devices:
  print(dev)

PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')


In [3]:
tf.config.list_physical_devices('GPU')

[]

In [4]:
train_data = keras.preprocessing.image_dataset_from_directory(
    '../created_data/train', 
    labels='inferred',
    subset="training",
    validation_split=.2,
    seed=seed,
    #color_mode='grayscale',
    shuffle=True)

val_data = keras.preprocessing.image_dataset_from_directory(
    '../created_data/train', 
    labels='inferred',
    subset="validation",
    validation_split=.2,
    seed=seed,
    #color_mode='grayscale',
    shuffle=True)

test_data = keras.preprocessing.image_dataset_from_directory(
    '../created_data/test', 
    labels='inferred',
    #color_mode='grayscale',
    shuffle=False,)

Found 7936 files belonging to 2 classes.
Using 6349 files for training.
Found 7936 files belonging to 2 classes.
Using 1587 files for validation.
Found 1974 files belonging to 2 classes.


In [5]:
def preprocess(image, label):
    resized_image = tf.image.resize(image, [512,512])
    final_image = keras.applications.mobilenet_v3.preprocess_input(resized_image)
    return final_image, label

In [6]:
train_data = train_data.map(preprocess).prefetch(1)
val_data = val_data.map(preprocess).prefetch(1)
test_data = test_data.map(preprocess).prefetch(1)

In [7]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [17]:
base_model_mobilenetv3 = keras.applications.MobileNetV3Large(weights = 'imagenet', include_top = False)

model_dir = '../models'
model_uuid = 'model_MobileNetV3_v1'

for layer in base_model_mobilenetv3.layers:
    layer.trainable = False

avg = keras.layers.GlobalAveragePooling2D()(base_model_mobilenetv3.output)
output = keras.layers.Dense(1, activation = 'sigmoid')(avg)
model_mobilenetv3 = keras.Model(inputs = base_model_mobilenetv3.input, outputs = output)

early_stopping = EarlyStopping(monitor='val_loss', verbose=2, patience=10, min_delta=.00250)
model_checkpoint = ModelCheckpoint(f'{model_dir}/{model_uuid}_weights{{epoch:08d}}.h5', verbose = 2, save_best_only=False, period=1)
csv_logger = CSVLogger(f'{model_dir}/{model_uuid}.csv', separator = ',', append = True)

optimizer = keras.optimizers.SGD(learning_rate = 0.2, momentum = 0.9, decay = 0.01)
model_mobilenetv3.compile(loss = 'binary_crossentropy', optimizer = optimizer,  metrics = ['accuracy', recall_m, precision_m, f1_m])

results = model_mobilenetv3.fit_generator(train_data,
    epochs=1000,
    validation_data=val_data,
    callbacks=[early_stopping, model_checkpoint, csv_logger])

Epoch 1/1000


  results = model_mobilenetv3.fit_generator(train_data,


Epoch 1: saving model to ../models\model_MobileNetV3_v1_weights00000001.h5
Epoch 2/1000
Epoch 2: saving model to ../models\model_MobileNetV3_v1_weights00000002.h5
Epoch 3/1000
Epoch 3: saving model to ../models\model_MobileNetV3_v1_weights00000003.h5
Epoch 4/1000
Epoch 4: saving model to ../models\model_MobileNetV3_v1_weights00000004.h5
Epoch 5/1000
Epoch 5: saving model to ../models\model_MobileNetV3_v1_weights00000005.h5
Epoch 6/1000
Epoch 6: saving model to ../models\model_MobileNetV3_v1_weights00000006.h5
Epoch 7/1000
Epoch 7: saving model to ../models\model_MobileNetV3_v1_weights00000007.h5
Epoch 8/1000
Epoch 8: saving model to ../models\model_MobileNetV3_v1_weights00000008.h5
Epoch 9/1000
Epoch 9: saving model to ../models\model_MobileNetV3_v1_weights00000009.h5
Epoch 10/1000
Epoch 10: saving model to ../models\model_MobileNetV3_v1_weights00000010.h5
Epoch 11/1000
Epoch 11: saving model to ../models\model_MobileNetV3_v1_weights00000011.h5
Epoch 12/1000
Epoch 12: saving model to .

## Logistic regression & RF

In [9]:
train_imgs = keras.preprocessing.image.ImageDataGenerator(rescale=1./255).flow_from_directory('../created_data/train', batch_size=8000)
test_imgs = keras.preprocessing.image.ImageDataGenerator(rescale=1./255).flow_from_directory('../created_data/test', batch_size=2000)

Found 7936 images belonging to 2 classes.
Found 1974 images belonging to 2 classes.


In [11]:
from sklearn.model_selection import train_test_split

X_i, y_i = next(train_imgs)
X_test, y_test = next(test_imgs)
X_train, X_val, y_train, y_val = train_test_split(X_i, y_i, train_size = 0.75, random_state = seed)

In [15]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

(5952, 256, 256, 3)
(1984, 256, 256, 3)
(1974, 256, 256, 3)


In [16]:
X_train = X_train.reshape(5952, -1)
X_val = X_val.reshape(1984, -1)
X_test = X_test.reshape(1974, -1)

print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

(5952, 196608)
(1984, 196608)
(1974, 196608)


In [17]:
y_train = y_train[:,1]
y_val = y_val[:,1]
y_test = y_test[:,1]

print(y_train.shape)
print(y_val.shape)
print(y_test.shape)

(5952,)
(1984,)
(1974,)


In [18]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(X_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [23]:
from sklearn.metrics import precision_score, accuracy_score

lr_pred = lr.predict(X_test)

precision_score(y_test, lr_pred), accuracy_score(y_test, lr_pred)

(0.5146316851664985, 0.5207700101317123)

In [22]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(X_train, y_train)

In [24]:
rf_pred = rf.predict(X_test)

precision_score(y_test, rf_pred), accuracy_score(y_test, rf_pred)

(0.4727694090382387, 0.48226950354609927)