In [1]:
# Import relevant modules
# ImagePrediction is a helper module (helpers/image_models) that assists with setting up the image recognition models
# and getting a prediction.
import cv2
import numpy as np
import random
from PIL import Image
from matplotlib import pyplot as plt
from helpers.image_models import ImagePrediction
# Ground truth helper so we can see the ground truth of an Imagenet image.
from validate_ground_truth.imagenet_ground_truth import ImagenetGroundTruth
# Instantiates the ImagePrediction object with the vgg16 model
predictor = ImagePrediction(model_name="vgg16")
# Instantiates the ImagenetGroundTruth object
ground_truth = ImagenetGroundTruth()

0 Physical GPUs, 0 Logical GPUs
Version:  2.15.0
Eager mode:  True
GPU is NOT AVAILABLE
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5


In [2]:
# Will use the predictor to give us a prediction fitness for the llama image.
print(predictor.get_prediction("./random_images/llama.jpg"))

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json
[('n02437616', 'llama', 0.9984397), ('n02417914', 'ibex', 0.00045074782), ('n02412080', 'ram', 0.0003347307), ('n01518878', 'ostrich', 0.00028087143), ('n02423022', 'gazelle', 0.00020455045), ('n02415577', 'bighorn', 0.00011685205), ('n02326432', 'hare', 9.440649e-05), ('n01806143', 'peacock', 1.0596843e-05), ('n02391049', 'zebra', 9.451463e-06), ('n01877812', 'wallaby', 5.0913545e-06)]


In [3]:
# Will use the predictor to give us a prediction fitness for the truck image.
print(predictor.get_prediction("./random_images/truck.jpg"))

[('n04467665', 'trailer_truck', 0.6130443), ('n03796401', 'moving_van', 0.18597458), ('n03417042', 'garbage_truck', 0.17115846), ('n04065272', 'recreational_vehicle', 0.01292002), ('n04461696', 'tow_truck', 0.004495148), ('n03776460', 'mobile_home', 0.0028719658), ('n03345487', 'fire_engine', 0.0025651865), ('n03769881', 'minibus', 0.0014597897), ('n02701002', 'ambulance', 0.001394818), ('n03384352', 'forklift', 0.0006029678)]


In [4]:
# Gets the prediction for the Imagenet image 'grey_fox' and validates if the ground_truth for this image is correct.
prediction = predictor.get_prediction("./imagenet_images/ILSVRC2012_val_00000323.JPEG")
print(prediction)
print(ground_truth.validate_ground_truth("imagenet_images/ILSVRC2012_val_00000323.JPEG", prediction[0][0]))

[('n02120505', 'grey_fox', 0.71067125), ('n02114855', 'coyote', 0.21894883), ('n02119789', 'kit_fox', 0.035443097), ('n02119022', 'red_fox', 0.016821338), ('n02114712', 'red_wolf', 0.006262531), ('n01877812', 'wallaby', 0.0045209085), ('n02326432', 'hare', 0.0021693334), ('n02325366', 'wood_rabbit', 0.0020182133), ('n02114367', 'timber_wolf', 0.0008821066), ('n02127052', 'lynx', 0.0007048801)]
True


In [5]:
# Gets the fitness prediction for the arctic fox image.
arctic_fox = predictor.get_prediction("./random_images/arctic-fox.jpg")
print(arctic_fox)

[('n02120079', 'Arctic_fox', 0.9021169), ('n02134084', 'ice_bear', 0.0374206), ('n02114548', 'white_wolf', 0.02616428), ('n02441942', 'weasel', 0.025217723), ('n02326432', 'hare', 0.0026365125), ('n02111889', 'Samoyed', 0.0025606796), ('n02109961', 'Eskimo_dog', 0.0014540001), ('n02110185', 'Siberian_husky', 0.0006840612), ('n02442845', 'mink', 0.00034034983), ('n02114367', 'timber_wolf', 0.0002977586)]


In [6]:
# Gets the fitness prediction for the arctic fox sepia image.
# This is the same image as arctic fox but simply modified to be a sepia colour.
arctic_fox_sepia = predictor.get_prediction("./random_images/arctic-fox-sepia.jpg")
print(arctic_fox_sepia)

[('n02120079', 'Arctic_fox', 0.80227584), ('n02114548', 'white_wolf', 0.07066281), ('n02134084', 'ice_bear', 0.044756357), ('n02441942', 'weasel', 0.026944483), ('n02111889', 'Samoyed', 0.022159975), ('n02326432', 'hare', 0.008744982), ('n02442845', 'mink', 0.0021549524), ('n02109961', 'Eskimo_dog', 0.0014565279), ('n01877812', 'wallaby', 0.0013594517), ('n03794056', 'mousetrap', 0.0013465774)]


In [7]:
# Gives us the amount the fitness has decreased between the original and sepia image.
# We can see that by just applying a very small change in colour to the image, that we can already reduce the fitness.
print(arctic_fox[0][2] - arctic_fox_sepia[0][2])

0.09984106


In [15]:
# Your task now is to expand on this experiment by manually making changes to the provided images as well as any other images you can find.
# Start by running the image through the predictor to get a baseline fitness.
# Then copy and modify the image to see what effect this has on the fitness.
# Are there any situations where you can flip the top-1 prediction by making these changes to the image.
# (NOTE - This may be easier with an image that already has a low prediction fitness to begin with)


def random_black_pixels(image_path, percentage):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img / 255.0
    img = img.astype(np.float32)
    img = img.reshape(-1, 3)
    for i in range(int(len(img) * percentage)):
        img[random.randint(0, len(img) - 1)] = [0, 0, 0]
    img = img.reshape(718, 960, 3)
    img = img * 255.0
    img = img.astype(np.uint8)
    img = Image.fromarray(img)
    return img

# change image ./random_images/llama.jpg with 50% of the pixels to black
img = random_black_pixels("./random_images/llama.jpg", 0.4)

# save the image
img.save("./modified_images/llama.jpg")

# get the prediction for the modified image
print(predictor.get_prediction("./modified_images/llama.jpg"))


[('n02417914', 'ibex', 0.47466332), ('n02437616', 'llama', 0.40100285), ('n02326432', 'hare', 0.07232786), ('n02415577', 'bighorn', 0.009352403), ('n02423022', 'gazelle', 0.009291524), ('n02412080', 'ram', 0.008976868), ('n01877812', 'wallaby', 0.007323675), ('n02396427', 'wild_boar', 0.0011894159), ('n02056570', 'king_penguin', 0.00076480047), ('n02127052', 'lynx', 0.00073175586)]


Now the llama is being classifed as an ibex, which is weird because we can still identify the llama in the image. This is a case where the top-1 prediction was flipped by making changes to the image.

In [20]:
# We randomly want to change either R, G or B to double the amount of the original value

def double_rgb_values(image_path):
    # Read the image
    img = cv2.imread(image_path)

    # Convert color space from BGR to RGB
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Normalize the image to 0-1 range and convert to float32 for precise multiplication
    img = img / 255.0
    img = img.astype(np.float32)

    # Double all the RGB values and ensure the values are within [0, 1] after doubling
    img = np.clip(img * 3, 0, 1)

    # Convert back to 0-255 range and to uint8
    img = np.clip(img * 255.0, 0, 255).astype(np.uint8)

    # Convert the numpy array back to a PIL image
    img = Image.fromarray(img)

    return img

# change the truck image
img = double_rgb_values("./random_images/truck.jpg")

# save the image
img.save("./modified_images/truck.jpg")

# get the prediction for the modified image
print(predictor.get_prediction("./modified_images/truck.jpg"))


[('n03796401', 'moving_van', 0.37529966), ('n03417042', 'garbage_truck', 0.15808234), ('n02701002', 'ambulance', 0.1555137), ('n04467665', 'trailer_truck', 0.061914496), ('n03345487', 'fire_engine', 0.05397009), ('n03769881', 'minibus', 0.04172391), ('n04065272', 'recreational_vehicle', 0.032933004), ('n03977966', 'police_van', 0.020025576), ('n04461696', 'tow_truck', 0.017693216), ('n03776460', 'mobile_home', 0.014476869)]


This is now being classified as a moving_van but we can still see its a truck.

In [27]:
# Function that rotates the image by X degrees
def rotate_image(image_path):
    # Read the image
    img = cv2.imread(image_path)

    # Rotate the image
    img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)

    # Convert the image to RGB
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Convert the numpy array back to a PIL image
    img = Image.fromarray(img)

    return img

# rotate the truck image by 90 degrees
img = rotate_image("./imagenet_images/ILSVRC2012_val_00005053.JPEG")

# save the image
img.save("./modified_images/rotated_truck.jpg")

# get the prediction for the modified image
print(predictor.get_prediction("./modified_images/rotated_truck.jpg"))
print("\n\n\n")
print("original_image", predictor.get_prediction("./imagenet_images/ILSVRC2012_val_00005053.JPEG"))

[('n03000684', 'chain_saw', 0.37360898), ('n02226429', 'grasshopper', 0.1234823), ('n02236044', 'mantis', 0.077015236), ('n02168699', 'long-horned_beetle', 0.06346717), ('n02231487', 'walking_stick', 0.04875064), ('n02229544', 'cricket', 0.040888287), ('n03649909', 'lawn_mower', 0.02872657), ('n02167151', 'ground_beetle', 0.028077086), ('n02165105', 'tiger_beetle', 0.027187046), ('n02233338', 'cockroach', 0.014033518)]




original_image [('n03649909', 'lawn_mower', 0.93379766), ('n03967562', 'plow', 0.008322287), ('n04465501', 'tractor', 0.007575278), ('n02002556', 'white_stork', 0.005605583), ('n04482393', 'tricycle', 0.004658327), ('n04254680', 'soccer_ball', 0.0039846017), ('n03000684', 'chain_saw', 0.0032816522), ('n02797295', 'barrow', 0.0023677172), ('n04371774', 'swing', 0.0022201233), ('n03134739', 'croquet_ball', 0.002172703)]


In [25]:
# Function that mirrors the image
def mirror_image(image_path):
    # Read the image
    img = cv2.imread(image_path)

    # Mirror the image
    img = cv2.flip(img, 1)

    # Convert the image to RGB
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Convert the numpy array back to a PIL image
    img = Image.fromarray(img)

    return img

# mirror the truck image
img = mirror_image("./random_images/truck.jpg")

# save the image
img.save("./modified_images/mirrored_truck.jpg")

# get the prediction for the modified image
print(predictor.get_prediction("./modified_images/mirrored_truck.jpg"))

[('n04467665', 'trailer_truck', 0.45895877), ('n03796401', 'moving_van', 0.2885381), ('n03417042', 'garbage_truck', 0.14200899), ('n04065272', 'recreational_vehicle', 0.05702225), ('n02701002', 'ambulance', 0.007968304), ('n03769881', 'minibus', 0.007016168), ('n03776460', 'mobile_home', 0.006815996), ('n04461696', 'tow_truck', 0.006362023), ('n03345487', 'fire_engine', 0.0056364345), ('n03445924', 'golfcart', 0.0026690678)]
