# Project Setup

## Installing Required Libraries

Installing the libraries required by this project

In [3]:
!pip install tensorflow
!pip install tensorflow-hub
!pip install scikit-learn
!pip install keras-tuner
! pip install numpy
! pip install pandas
!pip install seaborn 
!pip install matplotlib

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


# Feature Extraction

## Data Preprocessing

It was noted that some of the folders, in particular the art style ukiyo e had different names for the folders across the Latend Diffusion, Stable DIffusion and Human Samples. Considerations we made and the folders were renamed "ukiyo-e" across the test and train samples to help with the functions created to auto train and test

In [49]:
import tensorflow as tf
from tensorflow import keras
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image_dataset_from_directory
import numpy as np
import pandas as pd
from keras.applications.vgg16 import preprocess_input as vgg_preprocessor
from keras.applications.resnet50 import preprocess_input as resnet_preprocessor
from pathlib import Path
import shutil
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.layers import Rescaling, Conv2D, GlobalAveragePooling2D,MaxPooling2D, Flatten, Dense, BatchNormalization, Activation, Dropout
from keras.models import Sequential
from keras.optimizers import Adam
img_height = 256
img_width = 256

DATASET_SIZE = 500

Creating a reference to the base location of all the files

In [23]:
import os #used in this instance to get the relative location of the notebook

#set the base directory NB windows using relative paths causes errors
BASE_LOC = os.path.join(os.getcwd(), 'Real_AI_SD_LD_Dataset')
TRAIN_DATA_PATH = os.path.join(BASE_LOC, 'temp_train')
TEST_DATA_PATH = os.path.join(BASE_LOC, 'temp_test')

Creating a filtered directory with only data for an Artstyle and an AI generator

In [20]:
# def copy_dir(src,dest):
#   if Path(dest).exists():
#     shutil.rmtree(dest)
#   shutil.copytree(src,dest)

def copy_dir(src, dest):
    if Path(dest).exists():
        shutil.rmtree(dest)

    os.makedirs(dest, exist_ok=True)

    files_copied = 0
    for root, dirs, files in os.walk(src):
        for file in files:
            src_file_path = os.path.join(root, file)
            dest_file_path = os.path.join(dest, os.path.relpath(src_file_path, src))
            os.makedirs(os.path.dirname(dest_file_path), exist_ok=True)
            shutil.copy(src_file_path, dest_file_path)
            files_copied += 1

            if DATASET_SIZE  is not None and files_copied >= DATASET_SIZE :
                return

def filter_imageset(ai,art_style):
  ai_src = f"AI_{ai}_{art_style}"

  #temp directories /content/Real_AI_SD_LD_Dataset/train/AI_LD_art_nouveau

  train_ai_folder = os.path.join(TRAIN_DATA_PATH,ai_src) #destination folder in temp_train for AI images
  test_ai_folder = os.path.join(TEST_DATA_PATH,ai_src) #destination folder in temp_test for AI images

  train_human_folder = os.path.join(TRAIN_DATA_PATH,art_style) #destination folder in temp_train for Human images
  test_human_folder = os.path.join(TEST_DATA_PATH,art_style) #destination folder in temp_test for Human images

  train_dir = os.path.join(BASE_LOC,"train") #directory in \Real_AI_SD_LD_Dataset that contains training images
  test_dir = os.path.join(BASE_LOC,"test") #directory in \Real_AI_SD_LD_Dataset that contains testing images

  #copying AI images
  copy_dir(
    os.path.join(train_dir,ai_src),
    train_ai_folder
  )
  copy_dir(
    os.path.join(test_dir,ai_src),
    test_ai_folder
  )

  #copying Human images
  copy_dir(
    os.path.join(train_dir,art_style),
    train_human_folder
  )
  copy_dir(
    os.path.join(test_dir,art_style),
    test_human_folder
  )
  

### Preprocessing Function

In [32]:
def generic_preprocessor(image, label):
    image = tf.image.resize(image, [img_height, img_width])
    image = image / 255.0
    image = tf.image.random_rotation(image, 7)
    
    image = tf.image.random_flip_left_right(image)
    
    zoom_factor = tf.random.uniform([], 0.8, 1.2) 
    image = tf.image.resize(image, [img_height*zoom_factor, img_width*zoom_factor])
    image = tf.image.resize_with_crop_or_pad(image, img_height, img_width)
    
    return image, label

def get_image_data(ai,art_style):
      filter_imageset(ai,art_style)
      ai_src = f"AI_{ai}_{art_style}"
      train = validate = test = None
      classes = [ai_src, art_style]
      train = image_dataset_from_directory(
            TRAIN_DATA_PATH,
            validation_split=0.2,
            label_mode="binary",
            labels = 'inferred',
            class_names=classes,
            subset = "training",
            seed=64,
            image_size = (img_height, img_width),
            batch_size = 32,
      )
      validate = image_dataset_from_directory(
            TRAIN_DATA_PATH,
            validation_split=0.2,
            label_mode="binary",
            labels = 'inferred',
            class_names=classes,
            subset = "validation",
            seed=64,
            image_size = (img_height, img_width),
            batch_size = 32,
      )
      test = image_dataset_from_directory(
            TEST_DATA_PATH,
            label_mode="binary",
            labels="inferred",
            class_names=classes,
            seed=64,
            image_size = (img_height, img_width),
            batch_size = 32,
      )
      AUTOTUNE = tf.data.AUTOTUNE

      train = train.cache().shuffle(500).prefetch(buffer_size=AUTOTUNE)
      validate = validate.cache().prefetch(buffer_size=AUTOTUNE)
      test = test.cache().prefetch(buffer_size=AUTOTUNE)


      return train,validate,test

# Individual Classifier Models

In [82]:
NUM_EPOCHS = 5
WEIGHT = 15
def wicm(y_true, y_pred):
    true_positives = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 1), tf.equal(tf.round(y_pred), 1)), tf.float32))
    true_negatives = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 0), tf.equal(tf.round(y_pred), 0)), tf.float32))
    false_positives = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 0), tf.equal(tf.round(y_pred), 1)), tf.float32))
    false_negatives = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 1), tf.equal(tf.round(y_pred), 0)), tf.float32))
    
    metric = -((false_positives + (false_negatives * WEIGHT)) / (true_positives + true_negatives + false_positives + false_negatives))
    return metric

def train_model(model_in,train_ds,val_ds,model_name):
  model = Sequential([
    model_in,
    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dense(1, activation='sigmoid')
  ])
  model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[
        keras.metrics.BinaryAccuracy(threshold=0.5, name='accuracy'),
        keras.metrics.FalseNegatives(),
        keras.metrics.AUC(),
        wicm
    ]
)
  checkpoint_filepath = f"{BASE_LOC}_{model_name}_checkpoint.keras"
  early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
  checkpoint = ModelCheckpoint(checkpoint_filepath, monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)
  
  return model.fit(
      train_ds,
      epochs=NUM_EPOCHS,
      validation_data=val_ds,
      callbacks=[early_stopping,checkpoint]
  ),model

## VGG16

In [83]:
vgg = VGG16(weights='imagenet',include_top=False,input_shape=(img_height, img_width, 3))

for layer in vgg.layers:
    layer.trainable = False
train,val,test = get_image_data(
        ai="LD",
        art_style="art_nouveau"
    )
histv,trainedvgg = train_model(vgg,train,val,'vgg')

Found 1000 files belonging to 2 classes.
Using 800 files for training.
Found 1000 files belonging to 2 classes.
Using 200 files for validation.
Found 1000 files belonging to 2 classes.
Epoch 1/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.8544 - auc_15: 0.9141 - false_negatives_15: 24.8800 - loss: 0.4891 - wicm: -1.3393
Epoch 1: val_accuracy improved from -inf to 0.98000, saving model to c:\Users\Chimera\Desktop\UWI\AI Development\AI art detector Project\Proj 3610\Code\COMP3610-GROUPM\Real_AI_SD_LD_Dataset_vgg_checkpoint.keras
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 4s/step - accuracy: 0.8573 - auc_15: 0.9165 - false_negatives_15: 25.0769 - loss: 0.4795 - wicm: -1.3106 - val_accuracy: 0.9800 - val_auc_15: 0.9901 - val_false_negatives_15: 0.0000e+00 - val_loss: 0.1191 - val_wicm: -0.0179
Epoch 2/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.9937 - auc_15: 0.9992 - false_negative

## ResNet-50

In [84]:
resnet = ResNet50(weights='imagenet',include_top=False,input_shape=(img_height, img_width, 3))

for layer in resnet.layers:
    layer.trainable = False
train,val,test = get_image_data(
        ai="LD",
        art_style="art_nouveau"
    )

histr,trainedresnet = train_model(resnet,train,val,'resnet50')

Found 1000 files belonging to 2 classes.
Using 800 files for training.
Found 1000 files belonging to 2 classes.
Using 200 files for validation.
Found 1000 files belonging to 2 classes.
Epoch 1/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.7909 - auc_16: 0.8793 - false_negatives_16: 29.9600 - loss: 0.4701 - wicm: -1.6632
Epoch 1: val_accuracy improved from -inf to 0.98500, saving model to c:\Users\Chimera\Desktop\UWI\AI Development\AI art detector Project\Proj 3610\Code\COMP3610-GROUPM\Real_AI_SD_LD_Dataset_resnet50_checkpoint.keras
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 3s/step - accuracy: 0.7958 - auc_16: 0.8833 - false_negatives_16: 30.1923 - loss: 0.4601 - wicm: -1.6266 - val_accuracy: 0.9850 - val_auc_16: 0.9946 - val_false_negatives_16: 0.0000e+00 - val_loss: 0.0554 - val_wicm: -0.0134
Epoch 2/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9926 - auc_16: 0.9999 - false_nega

## Simple CNN

In [None]:

def CNN_img_ai_or_hum(filters=64, kernel_size=3, input_shape=(256, 256, 3), dense_units=64, output_units=1, activation='relu'):
    model = Sequential([
        Conv2D(filters=filters, kernel_size=kernel_size, activation=activation, input_shape=input_shape),
        MaxPooling2D(),
        Conv2D(filters=filters, kernel_size=kernel_size, activation=activation),
        MaxPooling2D(),
        Flatten(),
        Dense(units=dense_units, activation=activation),
        Dropout(0.5),
        Dense(units=output_units, activation='sigmoid')
    ])
    return model

cnn = CNN_img_ai_or_hum()
trained_cnn_vgg = train_model(cnn,train,val,'simple_vgg')
trained_cnn_resnet = train_model(cnn,train,val,'simple_resnet50')

In [86]:
true_labels = []
for images, labels in test:
    true_labels.extend(labels.numpy())

def evalmodel(model):
    evaluation_result = model.evaluate(test)
    print("Test Loss:", evaluation_result[0])
    print("Test Accuracy:", evaluation_result[1])
    print("False Negatives:", evaluation_result[2])
    print("AUC:", evaluation_result[3])
evalmodel(trainedvgg)
evalmodel(trainedresnet)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 3s/step - accuracy: 0.9919 - auc_15: 0.9959 - false_negatives_15: 3.9394 - loss: 0.0454 - wicm: -0.1008
Test Loss: 0.03557347506284714
Test Accuracy: 0.9929999709129333
False Negatives: 0.9969040155410767
AUC: 6.0
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 2s/step - accuracy: 0.9850 - auc_16: 0.9980 - false_negatives_16: 3.0303 - loss: 0.0503 - wicm: -0.1181
Test Loss: 0.030073510482907295
Test Accuracy: 0.9900000095367432
False Negatives: 0.9986740946769714
AUC: 4.0


In [87]:
vggpredict = trainedvgg.predict(test)
respredict = trainedresnet.predict(test)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 3s/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 2s/step


In [89]:
!pip install scikit-learn

Defaulting to user installation because normal site-packages is not writeable


In [90]:
from sklearn.metrics import f1_score
weight_resnet = 0.7
weight_vgg16 = 0.3
weighted_predictions = (weight_resnet * respredict) + (weight_vgg16 * vggpredict )
ensemble_predictions = np.argmax(weighted_predictions, axis=1)
f1 = f1_score(true_labels, ensemble_predictions, average='macro')

print("Weighted Voting Classifier F1 Score:", f1)

Weighted Voting Classifier F1 Score: 0.3333333333333333
