In [1]:

import os
import shutil
import random    # shuffling images randomnly

# Use your dataset path
SOURCE_DIR = r'C:\Users\praveen agarwal\Desktop\DFproject\data\raw_data'
BASE_DIR = r'C:\Users\praveen agarwal\Desktop\DFproject\data\Data_Split'

# Define your split ratios
train_split = 0.7
val_split = 0.2
test_split = 0.1

# Create directory structure automatically
for split in ['train', 'val', 'test']:
    for category in ['Real', 'Fake']:
        dir_path = os.path.join(BASE_DIR, split, category)
        os.makedirs(dir_path, exist_ok=True)

# Loop through each category and split the images
random.seed(42)  # For reproducibility
for category in ['Real', 'Fake']:
    src_folder = os.path.join(SOURCE_DIR, category)
    images = os.listdir(src_folder)
    random.shuffle(images)

    total = len(images)   
    train_end = int(total * train_split)
    val_end = train_end + int(total * val_split)

    train_files = images[:train_end]
    val_files = images[train_end:val_end]
    test_files = images[val_end:]

    #  Copy files into new folders
    for file_list, split in zip([train_files, val_files, test_files], ['train', 'val', 'test']):
        for file_name in file_list:
            src_file = os.path.join(src_folder, file_name)
            dst_folder = os.path.join(BASE_DIR, split, category)
            dst_file = os.path.join(dst_folder, file_name)
            shutil.copy2(src_file, dst_file)

print(" Dataset has been split and organized successfully!")


 Dataset has been split and organized successfully!


In [None]:
import numpy as np
import cv2
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator   


In [None]:
# Define directories
INPUT_DIR = r'C:\Users\praveen agarwal\Desktop\DFproject\data\Data_Split'
OUTPUT_DIR = r'C:\Users\praveen agarwal\Desktop\DFproject\data\Data_Preprocessed'

# Define image target size
TARGET_SIZE = (224, 224)  # 224x224 is standard for CNNs


In [None]:
# Define Preprocessing Function

def preprocess_image(image_path, target_size=(224, 224), grayscale=False, rescale=True):
    # Read the image
    img = cv2.imread(image_path)
    if img is None:
        raise ValueError(f"Image not found at {image_path}")   
    
    # Convert to RGB (cv2 loads BGR by default)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Convert to grayscale if specified
    if grayscale:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        img = np.expand_dims(img, axis=-1)  # Keep 3D shape (H, W, 1)
    
    # Resize
    img = cv2.resize(img, target_size)
    
    # Rescale pixel values to [0,1]
    if rescale:
        img = img.astype('float32') / 255.0

    return img


In [None]:
# Save Preprocessed Images
def save_preprocessed_image(image_array, output_path):
    # Convert float values back to 0-255
    img = (image_array * 255).astype(np.uint8)       
    
    # Handle grayscale
    if img.ndim == 3 and img.shape[2] == 1:
        img = img[:, :, 0]  # Drop the channel dimension

    cv2.imwrite(output_path, img)


In [None]:
# Create Output Directory Structure
def create_output_dirs(base_dir, splits=['train', 'val', 'test'], classes=['real', 'fake']):
    for split in splits:
        for cls in classes:
            path = os.path.join(base_dir, split, cls)
            os.makedirs(path, exist_ok=True)
 

In [7]:
# Preprocess Entire 
create_output_dirs(OUTPUT_DIR)

# Loop through splits and classes
for split in ['train', 'val', 'test']:
    for cls in ['real', 'fake']:
        input_folder = os.path.join(INPUT_DIR, split, cls)
        output_folder = os.path.join(OUTPUT_DIR, split, cls)
        image_files = os.listdir(input_folder)
        
        for img_name in tqdm(image_files, desc=f'Processing {split}/{cls}'):
            img_path = os.path.join(input_folder, img_name)
            preprocessed_img = preprocess_image(img_path, target_size=TARGET_SIZE, grayscale=False)
            output_path = os.path.join(output_folder, img_name)
            save_preprocessed_image(preprocessed_img, output_path)


Processing train/real: 100%|██████████| 3500/3500 [01:06<00:00, 52.76it/s]
Processing train/fake: 100%|██████████| 3500/3500 [01:13<00:00, 47.86it/s]
Processing val/real: 100%|██████████| 1000/1000 [00:20<00:00, 47.89it/s]
Processing val/fake: 100%|██████████| 1000/1000 [00:20<00:00, 49.77it/s]
Processing test/real: 100%|██████████| 500/500 [00:09<00:00, 51.07it/s]
Processing test/fake: 100%|██████████| 500/500 [00:10<00:00, 48.80it/s]


In [None]:

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data augmentation

datagen_train = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.3,
    brightness_range=[0.7, 1.3],  
    horizontal_flip=True,
    channel_shift_range=20.0,
    fill_mode='nearest'      
)

datagen_val = ImageDataGenerator(
    rescale=1./255
)


: 