# Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
import cv2
import os
import glob
import h5py
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, accuracy_score

import functions as fn

import tensorflow as tf
from tensorflow import keras
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger

seed=42



I highly recommend reviewing the `functions.py` file, which contains essential functions used to preprocess data & apply the artificial rolling shutter effect.

# Data synthesis

In [None]:
dataDir='data'
trainDir='train2017'
valDir='val2017'
seed=42
transformed=.5
train=.8
test = 1 - train
strategy_list = ['left', 'right']
destDataDir='created_data'
destTrainDir='train'
destTestDir='test'
destValDir='val'
destNormDir='NORMAL'
destRSDir='ROLLING_SHUTTER'

np.random.seed(seed)

trainAnn=f'{dataDir}/annotations/instances_{trainDir}.json'
valAnn=f'{dataDir}/annotations/instances_{valDir}.json'

cocoTrain=COCO(trainAnn)
cocoVal=COCO(valAnn)

catIdsTrain = cocoTrain.getCatIds(catNms=['person'])
imgIdsTrain = cocoTrain.getImgIds(catIds=catIdsTrain)
imgIdsTrain = cocoTrain.getImgIds(imgIds=imgIdsTrain)
annIdsTrain = cocoTrain.getAnnIds(imgIds=imgIdsTrain, catIds=catIdsTrain, iscrowd=None)

catIdsVal = cocoVal.getCatIds(catNms=['person'])
imgIdsVal = cocoVal.getImgIds(catIds=catIdsVal)
imgIdsVal = cocoVal.getImgIds(imgIds=imgIdsVal)
annIdsVal = cocoVal.getAnnIds(imgIds=imgIdsVal, catIds=catIdsVal, iscrowd=None)

Note: depending on your computer, the below code can take anywhere from 30 minutes to a few hours to synthesize data

In [None]:
np.random.seed(seed)

prog = 0
iters = 0
limit = 5000 # number of images to process for both RS and normal; results in around double the amount (limit of 200 = ~400 total images)

ids_ignore = []

# clears destination folders
destDir = os.path.join(os.getcwd(), destDataDir.replace('/', '\\'))
for traintestval in [destTrainDir, destTestDir, destValDir]:
    for normrs in [destNormDir, destRSDir]:
        files = glob.glob(f'{destDir}\\{traintestval}\\{normrs}\\*')
        for f in files:
            os.remove(f)

### ROLLING SHUTTER

ids_list = []
for i in np.arange(limit):
    id = np.random.choice(imgIdsTrain)
    if int(id) not in ids_ignore:
        ids_list.append(id)

for id in ids_list: # rolling shutter
    print(f'Loading image id: {id}')

    try:
        imgDict = cocoTrain.loadImgs([id])[0]
        annDict = cocoTrain.loadAnns(cocoTrain.getAnnIds(imgIds=imgDict['id'], catIds=catIdsTrain, iscrowd=None))

        fpath = '{}/{}/{}'.format(dataDir, trainDir, imgDict['file_name'])
        img = cv2.imread(fpath)
        fn.remove_true_blacks(img)

        ann = annDict[np.random.randint(0, len(annDict))]
        strategy = strategy_list[np.random.randint(0, len(strategy_list))]

        masked, maskedInv = fn.generate_masked_images(img, ann)

        masked_rs = fn.apply_rolling_shutter(masked, ann, intensity=.7, strategy=strategy)

        maskedInv_filled = fn.fill_inv_masked(maskedInv, ann, strategy=strategy)

        final = fn.recombine_masked_imgs(masked_rs, maskedInv_filled)

        if(prog < train):
            fpath = '{}/{}/{}/{}'.format(destDataDir, destTrainDir, destRSDir, f'rs-{iters}.png')
            cv2.imwrite(fpath, final)
        else:
            fpath = '{}/{}/{}/{}'.format(destDataDir, destTestDir, destRSDir, f'rs-{iters}.png')
            cv2.imwrite(fpath, final)
    except:
        print('Something went wrong, continuing to next image')

    prog += 1 / len(ids_list)
    iters += 1
    print(f'Finished! {50*prog:.2f}% done')

### NORMAL

prog = 0
iters = 0
ids_list = []
for i in np.arange(limit):
    ids_list.append(np.random.choice(imgIdsTrain))

for id in ids_list: 
    print(f'Loading image id: {id}')

    imgDict = cocoTrain.loadImgs([id])[0]
    annDict = cocoTrain.loadAnns(cocoTrain.getAnnIds(imgIds=imgDict['id'], catIds=catIdsTrain, iscrowd=None))

    fpath = '{}/{}/{}'.format(dataDir, trainDir, imgDict['file_name'])
    img = cv2.imread(fpath)
    fn.remove_true_blacks(img)

    if(prog < train):
        fpath = '{}/{}/{}/{}'.format(destDataDir, destTrainDir, destNormDir, f'rs-{iters}.png')
        cv2.imwrite(fpath, img)
    else:
        fpath = '{}/{}/{}/{}'.format(destDataDir, destTestDir, destNormDir, f'rs-{iters}.png')
        cv2.imwrite(fpath, img)

    prog += 1 / len(ids_list)
    iters += 1
    print(f'Finished loading! {50 + 50*prog:.2f}% done')


# Modeling

## MobileNetV3Large

In [None]:
train_data = keras.preprocessing.image_dataset_from_directory(
    'created_data/train', 
    labels='inferred',
    subset="training",
    validation_split=.2,
    seed=seed,
    shuffle=True)

val_data = keras.preprocessing.image_dataset_from_directory(
    'created_data/train', 
    labels='inferred',
    subset="validation",
    validation_split=.2,
    seed=seed,
    shuffle=True)

test_data = keras.preprocessing.image_dataset_from_directory(
    'created_data/test', 
    labels='inferred',
    shuffle=False)

In [None]:
def preprocess(image, label):
    resized_image = tf.image.resize(image, [512,512])
    final_image = keras.applications.mobilenet_v3.preprocess_input(resized_image)
    return final_image, label

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
train_data = train_data.map(preprocess).prefetch(1)
val_data = val_data.map(preprocess).prefetch(1)
test_data = test_data.map(preprocess).prefetch(1)

Note: the below code will take multiple hours to run, even on the highest-end of computers

In [None]:
base_model_mobilenetv3 = keras.applications.MobileNetV3Large(weights = 'imagenet', include_top = False)

model_dir = '../models'
model_uuid = 'model_MobileNetV3_v1'

for layer in base_model_mobilenetv3.layers:
    layer.trainable = False

avg = keras.layers.GlobalAveragePooling2D()(base_model_mobilenetv3.output)
output = keras.layers.Dense(1, activation = 'sigmoid')(avg)
model_mobilenetv3 = keras.Model(inputs = base_model_mobilenetv3.input, outputs = output)

early_stopping = EarlyStopping(monitor='val_loss', verbose=2, patience=10, min_delta=.00250)
model_checkpoint = ModelCheckpoint(f'{model_dir}/{model_uuid}_weights{{epoch:08d}}.h5', verbose = 2, save_best_only=False, period=1)
csv_logger = CSVLogger(f'{model_dir}/{model_uuid}.csv', separator = ',', append = True)

optimizer = keras.optimizers.SGD(learning_rate = 0.2, momentum = 0.9, decay = 0.01)
model_mobilenetv3.compile(loss = 'binary_crossentropy', optimizer = optimizer,  metrics = ['accuracy', recall_m, precision_m, f1_m])

results = model_mobilenetv3.fit_generator(train_data,
    epochs=1000,
    validation_data=val_data,
    callbacks=[early_stopping, model_checkpoint, csv_logger])

## Logistic regression & random forest

Unfortunately for these `sklearn` models, I had to convert all the images to `numpy` arrays, meaning I had to store every image in memory. Computers with low available memory may struggle to run the following code

In [None]:
train_imgs = keras.preprocessing.image.ImageDataGenerator(rescale=1./255).flow_from_directory('../created_data/train', batch_size=8000)
test_imgs = keras.preprocessing.image.ImageDataGenerator(rescale=1./255).flow_from_directory('../created_data/test', batch_size=2000)

X_i, y_i = next(train_imgs)
X_test, y_test = next(test_imgs)
X_train, X_val, y_train, y_val = train_test_split(X_i, y_i, train_size = 0.75, random_state = seed)

X_train = X_train.reshape(5952, -1)
X_val = X_val.reshape(1984, -1)
X_test = X_test.reshape(1974, -1)

y_train = y_train[:,1]
y_val = y_val[:,1]
y_test = y_test[:,1]

In [None]:
lr = LogisticRegression()
lr.fit(X_train,y_train)

lr_pred = lr.predict(X_test)

precision_score(y_test, lr_pred), accuracy_score(y_test, lr_pred)

In [None]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

rf_pred = rf.predict(X_test)

precision_score(y_test, rf_pred), accuracy_score(y_test, rf_pred)