Connecting to Google Drive

In [1]:
#connect to drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:

%cd /content/drive/MyDrive/CIFAKE_DATASET/
!pwd
!ls

/content/drive/MyDrive/CIFAKE_DATASET
/content/drive/MyDrive/CIFAKE_DATASET
test  train  validation


Installing Transformers

In [3]:
# Install necessary libraries
!pip install transformers



In [4]:
import os
from PIL import Image
from concurrent.futures import ThreadPoolExecutor
import numpy as np
from transformers import pipeline

# Initialize the SDXL detector pipeline
detector = pipeline("image-classification", model="Organika/sdxl-detector", device=0)  # Use GPU if available


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.13k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/347M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/337 [00:00<?, ?B/s]

In [15]:
import tensorflow as tf
import os
import numpy as np

test_dir = '/content/drive/MyDrive/CIFAKE_DATASET/test'

# Part 1: Define a function to load and preprocess images using TensorFlow
def load_and_preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)  # Decode the image
    image = tf.image.resize(image, (32, 32))  # Resize to the required input size
    image = image / 255.0  # Normalize to [0, 1]
    return image

# Convert TensorFlow tensor to PIL Image
def tf_to_pil(image_tensor):
    image_tensor = tf.image.convert_image_dtype(image_tensor, dtype=tf.uint8)  # Convert to uint8 for PIL
    pil_image = Image.fromarray(image_tensor.numpy())  # Convert to PIL Image
    return pil_image

# Part 2: Create TensorFlow Datasets for 'REAL' and 'FAKE' images
def create_image_dataset(image_folder, label):
    image_paths = [os.path.join(image_folder, fname) for fname in os.listdir(image_folder) if fname.lower().endswith(('.png', '.jpg', '.jpeg'))]
    labels = [label] * len(image_paths)

    # Create TensorFlow dataset from images and labels
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))

    # Map the image loading function to the dataset
    dataset = dataset.map(lambda x, y: (load_and_preprocess_image(x), y))

    # Batch the dataset for faster processing
    dataset = dataset.batch(32)  # You can adjust the batch size depending on your memory

    return dataset

# Part 3: Create datasets for 'REAL' and 'FAKE' folders
real_folder = os.path.join(test_dir, 'REAL')
fake_folder = os.path.join(test_dir, 'FAKE')

real_dataset = create_image_dataset(real_folder, 'human')
fake_dataset = create_image_dataset(fake_folder, 'artificial')

# Part 4: Define function to calculate accuracy using TensorFlow Dataset
def calculate_accuracy(dataset, expected_class):
    correct_predictions = 0
    total_images = 0

    for images, labels in dataset:
        for image, label in zip(images, labels):
            # Convert TensorFlow tensor to PIL image before passing to the detector
            pil_image = tf_to_pil(image)

            # Use the SDXL detector for predictions
            prediction = detector(pil_image)

            predicted_class = prediction[0]['label'].lower()
            if predicted_class == expected_class and label == expected_class:
                correct_predictions += 1
            total_images += 1

    # Calculate accuracy as percentage
    accuracy = (correct_predictions / total_images) * 100 if total_images > 0 else 0
    return accuracy


In [16]:
# Part 5: Evaluate the Model on 'REAL' and 'FAKE' Images
real_accuracy = calculate_accuracy(real_dataset, expected_class='human')
print(f'Accuracy for REAL images (human class): {real_accuracy:.2f}%')

fake_accuracy = calculate_accuracy(fake_dataset, expected_class='artificial')
print(f'Accuracy for FAKE images (artificial class): {fake_accuracy:.2f}%')

Accuracy for REAL images (human class): 87.69%
Accuracy for FAKE images (artificial class): 27.49%
