In [26]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [27]:
import tensorflow as tf
physical_device = tf.config.experimental.list_physical_devices('GPU')
print(f'Device found : {physical_device}')

Device found : [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [28]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
import pandas as pd
import numpy as np
from skimage.util import view_as_windows
from scipy.fftpack import dct
import cv2
import os



In [29]:
def Discrete_cosine_transform(type_of_sub_image_blocks):
  """
  Performs Discrete Cosine Transform (DCT) on each sub-image block and calculates features for forgery detection.

  Args:
    type_of_sub_image_blocks: List of sub-image blocks.

  Returns:
    Tuple containing:
      - Standard deviation of AC coefficients across all blocks.
      - Number of non-zero AC coefficients in each block.
  """

  AC_Discrete_cosine_transform_stack = []  # Stores AC coefficients for all blocks
  number_of_ones = []                      # Stores number of non-zero AC coefficients per block

  # Loop through each sub-image block
  for block in type_of_sub_image_blocks:
    # Apply DCT and flatten transformed block
    Discrete_cosine_transform_block = dct(block, type=2, n=None, axis=-1, norm=None, overwrite_x=False)
    Discrete_cosine_transform_block_row = Discrete_cosine_transform_block.flatten()

    # Extract AC coefficients (skip DC coefficient)
    AC_Discrete_cosine_transform = Discrete_cosine_transform_block_row[1:]
    AC_Discrete_cosine_transform_stack.append(AC_Discrete_cosine_transform)

  # Convert AC coefficient stack to NumPy array and transpose for easier calculation
  AC_Discrete_cosine_transform_stack = np.asarray(AC_Discrete_cosine_transform_stack).T

  # Calculate standard deviation of AC coefficients across all blocks (row-wise)
  AC_Discrete_cosine_transform_standard_deviation = np.std(AC_Discrete_cosine_transform_stack, axis=1)

  # Count non-zero AC coefficients in each block
  for i in range(AC_Discrete_cosine_transform_stack.shape[0]):
    count_one = 0
    for j in range(AC_Discrete_cosine_transform_stack.shape[1]):
      if AC_Discrete_cosine_transform_stack[i][j] > 0:
        count_one += 1
    number_of_ones.append(count_one)

  number_of_ones = np.asarray(number_of_ones)

  return AC_Discrete_cosine_transform_standard_deviation, number_of_ones

In [30]:
def get_patches(image_mat):
  """

  Args:
    image_mat: A NumPy array representing the image.

  Returns:
    A list of extracted image patches.
  """

  stride = 8  # Distance between patches (pixels)
  window_shape = (8, 8)  # Size of each patch (height, width)

  # Convert image to NumPy array for efficient processing
  image_mat = np.array(image_mat)

  # Extract overlapping patches using windowing function
  windows = view_as_windows(image_mat, window_shape, step=stride)

  patches = []  # List to store extracted patches

  # Loop through each window and add it to the list
  for m in range(windows.shape[0]):
    for n in range(windows.shape[1]):
      patches.append(windows[m][n])

  return patches



def feature_sub_image(sub_image):
  """


  Args:
    sub_image: A NumPy array representing the sub-image.

  Returns:
    A flattened feature vector for the sub-image.
  """

  sub_image_blocks = get_patches(sub_image)  # Extract patches from entire sub-image

  # Define a smaller cropped region within the sub-image
  sub_image_cropped = sub_image[4:, 4:]  # Skip 4 pixels on each side (adjust as needed)

  sub_image_cropped_blocks = get_patches(sub_image_cropped)  # Extract patches from cropped region

  # Calculate DCT-based features for both sets of patches
  standard_deviation_original_image, non_zero_count_original_image = Discrete_cosine_transform(sub_image_blocks)
  standard_deviation_cropped_image, non_zero_count_cropped_image = Discrete_cosine_transform(sub_image_cropped_blocks)

  # Concatenate features from original and cropped patches into a single vector
  Feature_vector_sub_image = np.column_stack((standard_deviation_original_image, non_zero_count_original_image, standard_deviation_cropped_image, non_zero_count_cropped_image))

  # Flatten the vector for further processing
  Feature_vector_sub_image_flat = Feature_vector_sub_image.T.flatten()

  return Feature_vector_sub_image_flat



In [33]:
def YCrCb(path_to_folder, class_label):
  """
  Extracts features for images in a folder, converting them to YCrCb space and analyzing channels separately.

  Args:
    path_to_folder: Path to the folder containing image files.
    class_label: Label for the images in the folder.

  Returns:
    A list of feature vectors for all images in the folder, including filename and label.
  """

  data_list = []  # Stores feature vectors for all images

  # Loop through each image file in the folder
  for image_name in os.listdir(path_to_folder):
    image_path = os.path.join(path_to_folder, image_name)

    # Read the image and check if it's valid
    image = cv2.imread(image_path)
    if np.shape(image) == ():  # Skip empty images
      continue

    # Convert image to YCrCb color space for improved feature extraction
    image = cv2.cvtColor(image, cv2.COLOR_BGR2YCrCb)

    # Extract separate channels (Y, Cr, Cb)
    Y_channel = image[:, :, 0]

    cr_channel = image[:, :, 1]

    cb_channel = image[:, :, 2]

    # Extract features from each channel using the `feature_sub_image` function
    Feature_vector_y = feature_sub_image(Y_channel)

    Feature_vector_cr = feature_sub_image(cr_channel)

    Feature_vector_cb = feature_sub_image(cb_channel)

    # Concatenate features from all channels into a single vector
    final_feature_vector = np.concatenate((Feature_vector_y, Feature_vector_cb, Feature_vector_cr), axis=None)

    # Convert the final vector to a list and add image name and class label as the first two elements
    final_feature_vector = list(final_feature_vector)

    final_feature_vector.insert(0, image_name)

    final_feature_vector.insert(1, class_label)

    # Append the feature vector to the data list
    data_list.append(final_feature_vector)

  return data_list



In [34]:

# Change the below paths to the dataset path

# Path for authentic images
authentic_images = "/content/drive/MyDrive/Colab Notebooks/TPSP/CASIA 2.0/Au 2"

# Path for Copy-move forgery
Copy_move_images = "/content/drive/MyDrive/Colab Notebooks/TPSP/CASIA 2.0/Tp 2/CM"

# Path for Splicing forgery
Splicing_images = "/content/drive/MyDrive/Colab Notebooks/TPSP/CASIA 2.0/Tp 2/Sp"

output_name='CASIA1_dataset_copy_copy.csv'



In [35]:
# Load authentic and forged image samples with labels
authentic_samples = YCrCb(authentic_images, 0)  # Label 0 for authentic
forged_samples_1 = YCrCb(Copy_move_images, 1)  # Label 1 for copy-move forgeries
forged_samples_2 = YCrCb(Splicing_images, 1)  # Label 1 for splicing forgeries

# Create a Pandas DataFrame from the authentic samples
dataset = pd.DataFrame(authentic_samples)

# Append the first set of forged samples (copy-move) to the dataset
dataset = dataset.append(pd.DataFrame(forged_samples_1), ignore_index=True)

# Append the second set of forged samples (splicing) to the dataset
dataset = dataset.append(pd.DataFrame(forged_samples_2), ignore_index=True)

# Rename columns for clarity
dataset.rename(columns={0: "image_names", 1: "label"}, inplace=True)

# Normalize feature values between 0 and 1 (MinMaxScaler)
scaler_norm = MinMaxScaler()
dataset.iloc[:, 2:] = scaler_norm.fit_transform(dataset.iloc[:, 2:].to_numpy())  # Skip first two columns (name and label)

# Save the final dataset as a CSV file
dataset.to_csv(output_name, index=False)


  dataset = dataset.append(pd.DataFrame(forged_samples_1), ignore_index=True)
  dataset = dataset.append(pd.DataFrame(forged_samples_2), ignore_index=True)


In [36]:
dataset.tail()

Unnamed: 0,image_names,label,2,3,4,5,6,7,8,9,...,748,749,750,751,752,753,754,755,756,757
1956,Tp_D_NRN_S_O_nat10156_ani00036_12019.jpg,1,0.152142,0.152708,0.162188,0.179292,0.147378,0.170464,0.16183,0.474965,...,0.189142,0.284865,0.531214,0.284006,0.190213,0.277583,0.1806,0.286507,0.191612,0.280527
1957,Tp_D_NRN_S_O_nat10159_ani00097_12056.jpg,1,0.105419,0.106233,0.124465,0.150989,0.155706,0.194985,0.229479,0.424609,...,0.110887,0.170541,0.531214,0.179694,0.120886,0.181279,0.106892,0.174903,0.117208,0.166487
1958,Tp_D_NRN_M_N_cha10123_nat10124_12192.jpg,1,0.157903,0.121494,0.11933,0.107279,0.096406,0.101991,0.088433,0.491765,...,0.487727,0.603514,0.747706,0.618359,0.504515,0.59509,0.384812,0.626596,0.477162,0.59064
1959,Tp_D_NRN_M_N_cha10122_nat10144_12155.jpg,1,0.144545,0.099254,0.087436,0.077559,0.069603,0.071925,0.026458,0.436326,...,0.479064,0.574595,0.747706,0.6,0.516458,0.56191,0.396618,0.587729,0.494398,0.561054
1960,Tp_D_NRN_M_N_cha10123_nat10139_12193.jpg,1,0.160741,0.123187,0.120694,0.108253,0.097036,0.102377,0.088805,0.508957,...,0.451054,0.518919,0.747706,0.556606,0.473347,0.514432,0.365667,0.559134,0.448434,0.511296


In [4]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# Path for the excel file containing samples. For Copy-move, Splicing or mixed.
df=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/TPSP/CASIA 2.0_Copy_move_features.csv')

array=df.values

x_feature=array[:,2:]

y_label=array[:,1].astype('int')


(1408, 756)
(1408,)


In [7]:
x_feature.shape

(1408, 756)

## Principal component anaylsis

In [5]:
from sklearn.decomposition import PCA
pca = PCA(n_components='mle', svd_solver='full')
x_transformed_feature=pca.fit_transform(x_feature)
n_components = pca.n_components_

X_train,X_test,Y_train,Y_test=train_test_split(x_transformed_feature,y_label,test_size=0.30,random_state=7,stratify=y_label)

In [6]:
x_transformed_feature.shape

(1408, 730)

In [9]:
pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.9.0-py2.py3-none-any.whl (100 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.3/100.3 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-23.9.7-py3-none-any.whl (23 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-23.9.7 scikit-optimize-0.9.0


## Bayesian Optimization with K-fold cross validation

In [10]:
from skopt import BayesSearchCV
from sklearn.svm import SVC

# Define the parameter search space
param_space = {
    'C': (1, 1000),
    'gamma': (1e-4, 1),
    'kernel': ['rbf']
}

# Create the SVM model
model_SVC = SVC()

# Define the Bayesian Optimization search
bayes_search = BayesSearchCV(
    estimator=model_SVC,
    search_spaces=param_space,
    scoring='accuracy',
    cv=KFold(n_splits=10),
    n_iter=50,  # Number of iterations
    verbose=3,
    random_state=7  # Set a random state for reproducibility
)

# Fit the Bayesian Optimization search on the training data
bayes_search.fit(X_train, Y_train)

# Print the best parameters
print("Best: %f using %s" % (bayes_search.best_score_, bayes_search.best_params_))


Fitting 10 folds for each of 1 candidates, totalling 10 fits
[CV 1/10] END C=989, gamma=0.8586335241540766, kernel=rbf;, score=0.990 total time=   0.5s
[CV 2/10] END C=989, gamma=0.8586335241540766, kernel=rbf;, score=1.000 total time=   0.4s
[CV 3/10] END C=989, gamma=0.8586335241540766, kernel=rbf;, score=0.949 total time=   0.4s
[CV 4/10] END C=989, gamma=0.8586335241540766, kernel=rbf;, score=0.949 total time=   0.4s
[CV 5/10] END C=989, gamma=0.8586335241540766, kernel=rbf;, score=0.960 total time=   0.5s
[CV 6/10] END C=989, gamma=0.8586335241540766, kernel=rbf;, score=0.949 total time=   0.3s
[CV 7/10] END C=989, gamma=0.8586335241540766, kernel=rbf;, score=0.969 total time=   0.4s
[CV 8/10] END C=989, gamma=0.8586335241540766, kernel=rbf;, score=0.959 total time=   0.4s
[CV 9/10] END C=989, gamma=0.8586335241540766, kernel=rbf;, score=0.959 total time=   0.6s
[CV 10/10] END C=989, gamma=0.8586335241540766, kernel=rbf;, score=0.949 total time=   0.4s
Fitting 10 folds for each of



Fitting 10 folds for each of 1 candidates, totalling 10 fits
[CV 1/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.980 total time=   0.1s
[CV 2/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.1s
[CV 3/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.990 total time=   0.1s
[CV 4/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.960 total time=   0.1s
[CV 5/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.980 total time=   0.1s
[CV 6/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.949 total time=   0.1s
[CV 7/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.969 total time=   0.1s
[CV 8/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.990 total time=   0.1s
[CV 9/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.1s
[CV 10/10] END C=1000, gamma=0.0001, kernel=rbf;, score=0.990 total time=   0.1s
Fitting 10 folds for each of 1 candidates, totalling 10 fits
[CV 1/10] END C=262, gamma=0.9976177120880707, kernel=rbf;, score=0.



Fitting 10 folds for each of 1 candidates, totalling 10 fits
[CV 1/10] END ..C=841, gamma=0.0001, kernel=rbf;, score=0.980 total time=   0.1s
[CV 2/10] END ..C=841, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.1s
[CV 3/10] END ..C=841, gamma=0.0001, kernel=rbf;, score=0.990 total time=   0.1s
[CV 4/10] END ..C=841, gamma=0.0001, kernel=rbf;, score=0.960 total time=   0.1s
[CV 5/10] END ..C=841, gamma=0.0001, kernel=rbf;, score=0.980 total time=   0.1s
[CV 6/10] END ..C=841, gamma=0.0001, kernel=rbf;, score=0.939 total time=   0.1s
[CV 7/10] END ..C=841, gamma=0.0001, kernel=rbf;, score=0.969 total time=   0.1s
[CV 8/10] END ..C=841, gamma=0.0001, kernel=rbf;, score=0.990 total time=   0.1s
[CV 9/10] END ..C=841, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.1s
[CV 10/10] END .C=841, gamma=0.0001, kernel=rbf;, score=0.990 total time=   0.1s
Fitting 10 folds for each of 1 candidates, totalling 10 fits
[CV 1/10] END C=596, gamma=0.0002989195742712073, kernel=rbf;, score



Fitting 10 folds for each of 1 candidates, totalling 10 fits
[CV 1/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.980 total time=   0.1s
[CV 2/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.1s
[CV 3/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.990 total time=   0.1s
[CV 4/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.960 total time=   0.1s
[CV 5/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.980 total time=   0.1s
[CV 6/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.949 total time=   0.1s
[CV 7/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.969 total time=   0.1s
[CV 8/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=0.990 total time=   0.1s
[CV 9/10] END .C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.1s
[CV 10/10] END C=1000, gamma=0.0001, kernel=rbf;, score=0.990 total time=   0.1s
Fitting 10 folds for each of 1 candidates, totalling 10 fits
[CV 1/10] END C=464, gamma=0.00012349373610759653, kernel=rbf;, scor

## Support Vector Machines

In [11]:
# model_SVC = SVC(C=500,gamma=0.01, kernel='rbf')
model_SVC = SVC(C=891,gamma=0.00078, kernel='rbf')

model_SVC.fit(X_train,Y_train)

predictions=model_SVC.predict(X_test)

print("Results:")
print("Accuracy:", accuracy_score(Y_test, predictions))
print("Confusion Matrix:\n", confusion_matrix(Y_test, predictions))
print("Classification Report:\n", classification_report(Y_test, predictions))

Results:
Accuracy: 0.9952718676122931
Confusion Matrix:
 [[252   1]
 [  1 169]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       253
           1       0.99      0.99      0.99       170

    accuracy                           1.00       423
   macro avg       1.00      1.00      1.00       423
weighted avg       1.00      1.00      1.00       423



## Random Forest

In [12]:
from skopt import BayesSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

# Assuming X_train, X_test, Y_train, Y_test are already defined

# Define the parameter search space
param_space = {
    'n_estimators': (10, 200),
    'max_depth': (1, 20),
    'min_samples_split': (2, 20)
    # 'min_samples_leaf': (1, 20),
    # 'max_features': (0.1, 1.0)
}

# Create a Random Forest classifier
rf_classifier = RandomForestClassifier()

# Use StratifiedKFold for k-fold cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Use BayesSearchCV for Bayesian Optimization
opt_rf = BayesSearchCV(
    rf_classifier,
    param_space,
    n_iter=50,  # adjust the number of iterations as needed
    cv=kf,  # StratifiedKFold for better handling of imbalanced classes
    n_jobs=-1,  # use all available cores for parallelization
    random_state=42
)

# Fit the optimizer on the training data
opt_rf.fit(X_train, Y_train)

# Print the best parameters found by Bayesian Optimization
print("Best Parameters:", opt_rf.best_params_)

# Make predictions on the test data using the best model
predictions = opt_rf.predict(X_test)

# Evaluate the accuracy
accuracy = accuracy_score(Y_test, predictions)
print("Test Accuracy: {:.2f}%".format(accuracy * 100))




Best Parameters: OrderedDict([('max_depth', 17), ('min_samples_split', 2), ('n_estimators', 200)])
Test Accuracy: 96.22%


In [14]:
best_params = {'max_depth': 17, 'min_samples_split': 2, 'n_estimators': 200}
model_RF = RandomForestClassifier(**best_params)

# Fit the model on the training data
model_RF.fit(X_train, Y_train)

# Make predictions on the test set
predictions = model_RF.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(Y_test, predictions)
conf_matrix = confusion_matrix(Y_test, predictions)
classification_rep = classification_report(Y_test, predictions)

# Print the results
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

Accuracy: 0.9574468085106383
Confusion Matrix:
 [[245   8]
 [ 10 160]]
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.97      0.96       253
           1       0.95      0.94      0.95       170

    accuracy                           0.96       423
   macro avg       0.96      0.95      0.96       423
weighted avg       0.96      0.96      0.96       423

