In [1]:
import os
import numpy as np
import tensorflow as tf
from keras.models import load_model
from keras.applications.mobilenet import preprocess_input
from keras.preprocessing.image import (
    ImageDataGenerator,
    img_to_array,
    load_img
)
from keras import metrics

2022-07-15 20:04:08.996616: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-07-15 20:04:08.996703: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
batch_size = 30
dimension1 = dimension2 = 224
test_dir = '/home/htriedman/nsfw_data_scraper/data/test'
model = '/home/htriedman/Image-Content-Filtration/trained/hal-retraining.h5'

In [3]:
def prepare_image(img):
    # Converts image to RGB,
    # resizes to 224X224 and
    # reshapes it for the MobileNet V1 Model
    if img.mode != "RGB":
        img = img.convert("RGB")
    img = img.resize((224, 224))
    img = img_to_array(img)
    img = img.reshape((1, img.shape[0], img.shape[1], 3))
    img = preprocess_input(img)
    return img

In [4]:
test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(dimension1, dimension2),
    color_mode='rgb',
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)

Found 2333 images belonging to 2 classes.


# Model 1

In [5]:
model = load_model(model)
model.compile(
    metrics=[
        metrics.CategoricalAccuracy(),
        metrics.AUC(),
        metrics.CategoricalCrossentropy()
            ]
)

2022-07-15 20:04:30.954834: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-07-15 20:04:30.955355: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-07-15 20:04:30.955377: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2022-07-15 20:04:30.955411: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (stat1005): /proc/driver/nvidia/version does not exist
2022-07-15 20:04:30.955737: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the approp

In [6]:
out = model.evaluate(test_generator)
print(f"overall loss: {out[0]}")
print(f"overall accuracy: {out[1]:.4f}")
print(f"overall AUC: {out[2]:.4f}")
print(f"overall categorical crossentropy: {out[3]}")

2022-07-15 20:04:39.168214: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-07-15 20:04:39.192813: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2400045000 Hz


overall loss: 0.0
overall accuracy: 0.9546
overall AUC: 0.9748
overall categorical crossentropy: 0.3254348635673523


In [7]:
def get_imgs(file_dir, output_class):
    imgs = []
    for img in os.listdir(f"{file_dir}/{output_class}"):
        img = os.path.join(f"{file_dir}/{output_class}", img)
        img = load_img(img, target_size=(dimension1, dimension2))
        img = img_to_array(img)
        img = np.expand_dims(img, axis=0)
        img = preprocess_input(img)
        imgs.append(img)
    return np.vstack(imgs)

def class_metrics(file_dir, output_class, class_position):
    cl = get_imgs(file_dir, output_class)
    preds = model.predict(cl, batch_size=batch_size)
    output = {}
    for thresh in np.linspace(0.1, 0.9, num=9):
        preds_rounded = [1 if i[class_position] > thresh else 0 for i in preds]
        accuracy = sum(preds_rounded) / len(preds_rounded)
        output[f"accuracy_thresh_{round(thresh, 1)}"] = accuracy
        print(f"{output_class} accuracy with threshold {round(thresh, 1)}: {accuracy:.4f}")
    return output

In [9]:
_ = class_metrics(test_dir, "nsfw", 0)

nsfw accuracy with threshold 0.1: 0.9055
nsfw accuracy with threshold 0.2: 0.8976
nsfw accuracy with threshold 0.3: 0.8945
nsfw accuracy with threshold 0.4: 0.8898
nsfw accuracy with threshold 0.5: 0.8850
nsfw accuracy with threshold 0.6: 0.8772
nsfw accuracy with threshold 0.7: 0.8724
nsfw accuracy with threshold 0.8: 0.8661
nsfw accuracy with threshold 0.9: 0.8551


In [10]:
_ = class_metrics(test_dir, "sfw", 1)

sfw accuracy with threshold 0.1: 0.9918
sfw accuracy with threshold 0.2: 0.9900
sfw accuracy with threshold 0.3: 0.9882
sfw accuracy with threshold 0.4: 0.9847
sfw accuracy with threshold 0.5: 0.9806
sfw accuracy with threshold 0.6: 0.9782
sfw accuracy with threshold 0.7: 0.9759
sfw accuracy with threshold 0.8: 0.9723
sfw accuracy with threshold 0.9: 0.9647


In [11]:
nsfw = get_imgs(test_dir, 'nsfw')
sfw = get_imgs(test_dir, 'sfw')
nsfw_preds = model.predict(nsfw, batch_size=batch_size)
sfw_preds = model.predict(sfw, batch_size=batch_size)

In [12]:
# set threshold at which something is considered "nsfw"
thresh = 0.1

# cast to binary preds
nsfw_preds_rounded = [1 if i[0] > thresh else 0 for i in nsfw_preds]
sfw_preds_rounded = [1 if i[0] > thresh else 0 for i in sfw_preds]

# calculate metrics
tp = sum(nsfw_preds_rounded)
tn = len(sfw_preds_rounded) - sum(sfw_preds_rounded)
fp = sum(sfw_preds_rounded)
fn = len(nsfw_preds_rounded) - sum(nsfw_preds_rounded)

acc = (tp + tn) / (tp + tn + fp + fn)
fpr = fp / (fp + tn)
fnr = fn / (fn + tp)
precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * precision * recall / (precision + recall)

# print results
print(f"with threshold {thresh}:")
print(f"accuracy:\t{acc:.4f}")
print(f"\nconfusion matrix:\ntp:\t{tp}\tfn:\t{fn}\nfp:\t{fp}\ttn:\t{tn}\n")
print(f"fpr:\t\t{fpr:.4f}")
print(f"fnr:\t\t{fnr:.4f}")
print(f"precision:\t{precision:.4f}")
print(f"recall:\t\t{recall:.4f}")
print(f"f1:\t\t{f1:.4f}")

with threshold 0.1:
accuracy:	0.9486

confusion matrix:
tp:	575	fn:	60
fp:	60	tn:	1638

fpr:		0.0353
fnr:		0.0945
precision:	0.9055
recall:		0.9055
f1:		0.9055


# Model 2

In [13]:
model = '/home/htriedman/Image-Content-Filtration/trained/hal-retraining_run_2.h5'

In [14]:
model = load_model(model)
model.compile(
    metrics=[
        metrics.CategoricalAccuracy(),
        metrics.AUC(),
        metrics.CategoricalCrossentropy()
            ]
)

In [15]:
out = model.evaluate(test_generator)
print(f"overall loss: {out[0]}")
print(f"overall accuracy: {out[1]:.4f}")
print(f"overall AUC: {out[2]:.4f}")
print(f"overall categorical crossentropy: {out[3]}")

overall loss: 0.0
overall accuracy: 0.9623
overall AUC: 0.9774
overall categorical crossentropy: 0.4287799000740051


In [16]:
_ = class_metrics(test_dir, "nsfw", 0)

nsfw accuracy with threshold 0.1: 0.9087
nsfw accuracy with threshold 0.2: 0.9071
nsfw accuracy with threshold 0.3: 0.9008
nsfw accuracy with threshold 0.4: 0.8929
nsfw accuracy with threshold 0.5: 0.8850
nsfw accuracy with threshold 0.6: 0.8835
nsfw accuracy with threshold 0.7: 0.8787
nsfw accuracy with threshold 0.8: 0.8772
nsfw accuracy with threshold 0.9: 0.8646


In [17]:
_ = class_metrics(test_dir, "sfw", 1)

sfw accuracy with threshold 0.1: 0.9947
sfw accuracy with threshold 0.2: 0.9918
sfw accuracy with threshold 0.3: 0.9912
sfw accuracy with threshold 0.4: 0.9912
sfw accuracy with threshold 0.5: 0.9912
sfw accuracy with threshold 0.6: 0.9870
sfw accuracy with threshold 0.7: 0.9847
sfw accuracy with threshold 0.8: 0.9806
sfw accuracy with threshold 0.9: 0.9741


In [18]:
nsfw = get_imgs(test_dir, 'nsfw')
sfw = get_imgs(test_dir, 'sfw')
nsfw_preds = model.predict(nsfw, batch_size=batch_size)
sfw_preds = model.predict(sfw, batch_size=batch_size)

In [19]:
# set threshold at which something is considered "nsfw"
thresh = 0.1

# cast to binary preds
nsfw_preds_rounded = [1 if i[0] > thresh else 0 for i in nsfw_preds]
sfw_preds_rounded = [1 if i[0] > thresh else 0 for i in sfw_preds]

# calculate metrics
tp = sum(nsfw_preds_rounded)
tn = len(sfw_preds_rounded) - sum(sfw_preds_rounded)
fp = sum(sfw_preds_rounded)
fn = len(nsfw_preds_rounded) - sum(nsfw_preds_rounded)

acc = (tp + tn) / (tp + tn + fp + fn)
fpr = fp / (fp + tn)
fnr = fn / (fn + tp)
precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * precision * recall / (precision + recall)

# print results
print(f"with threshold {thresh}:")
print(f"accuracy:\t{acc:.4f}")
print(f"\nconfusion matrix:\ntp:\t{tp}\tfn:\t{fn}\nfp:\t{fp}\ttn:\t{tn}\n")
print(f"fpr:\t\t{fpr:.4f}")
print(f"fnr:\t\t{fnr:.4f}")
print(f"precision:\t{precision:.4f}")
print(f"recall:\t\t{recall:.4f}")
print(f"f1:\t\t{f1:.4f}")

with threshold 0.1:
accuracy:	0.9563

confusion matrix:
tp:	577	fn:	58
fp:	44	tn:	1654

fpr:		0.0259
fnr:		0.0913
precision:	0.9291
recall:		0.9087
f1:		0.9188


In [22]:
print("false negatives (classified sfw, actually nsfw)")
for i, f in enumerate(os.listdir("/home/htriedman/nsfw_data_scraper/data/test/nsfw")):
    if nsfw_preds_rounded[i] == 0:
        print(i, f, round(nsfw_preds[i][0], 3))
        
print("\nfalse positives (classified nsfw, actually sfw)")
for i, f in enumerate(os.listdir("/home/htriedman/nsfw_data_scraper/data/test/sfw")):
    if sfw_preds_rounded[i] == 1:
        print(i, f, round(sfw_preds[i][0], 3))

false negatives (classified sfw, actually nsfw)
38 08d1c166db27873f31ab0d75544beb268de70dd784df047a31480aa284871af0.jpg 0.0
59 62b5cea8072b156a5573b40067c5bce986e385cc43b4021018c8d4585ec78ed8.jpg 0.007
62 8db349406f4f3f99da8a239205d426a998a880b605d2281f470687ecc2d7c513.jpg 0.0
65 f8c2064755efd025b56af03b918687c62c2b1f387f92521e6a820bf7c9621a9c.jpg 0.0
69 f43dc1708304602da7d5c6a2f49fdddec41e53cf38f0a6eeb215b6e1f244667c.jpg 0.0
80 b506624cb34b7468fcbe96444851156b00153adc55ff9dcc4b5d11938725a9d2.jpg 0.0
86 f58c1ca866cee8b52c8745d447878a04a74f2f5f0c738d48c1ac7d89b9620616.jpg 0.0
94 0e80bb01ec5aa85580348a102bbc1f2c518d3a1635ad9d32a1aaad4ee3776287.jpg 0.004
104 e9eccc1af946ae15b75f3422faa97ed1ca4bf27034227e752a3108bb296e7c63.jpg 0.0
106 db78c5d7883c4882e60434b94bd17e1b4b64c04624ab80c38721be57911a5107.jpg 0.0
110 20fc63f1177e099960b15761e53acd8c0bcdea901d1f49706f86e5dee3d5a532.jpg 0.07
114 cb27189a1ef4ba2779b2f4ed005cd428e6767151ec0b565b8d4ffe7e912f09d1.jpg 0.0
125 49566f9a7f1fc9eafe3fd26fa24