Code implementation of - **A. Alahmadi, M. Hussain, H. Aboalsamh, G. Muhammad, G. Bebis, and H. Mathkour, “Passive detection of image forgery using dct and local binary pattern,” Signal, Image and Video Processing, vol. 11, no. 1, pp. 81–88, Jan 2017. [Online]. Available: https://doi.org/10.1007/s11760-016-0899-0**

In [1]:
import cv2
import numpy as np
import pandas as pd
from scipy.fftpack import dct
from skimage.feature import local_binary_pattern
import os
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import random
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Block-size (non-overlapping)
split_width = 16
split_height = 16

# LBP parameters
P = 8
R = 1.0
P

8

In [3]:
# This function devides an image into blocks
# parameters
#     parameter(1) : size of the image
#     parameter(2) : splitting size
#     parameter(3) : amount of overlapping (by default 0)
# return type  : return a list of starting points of the blocks

def start_points(size, split_size, overlap=0):
    points = [0]
    stride = int(split_size * (1-overlap))
    counter = 1
    while True:
        pt = stride * counter
        if pt + split_size >= size:
            points.append(size - split_size)
            break
        else:
            points.append(pt)
        counter += 1
    return points

In [4]:
# This function extracts features from all the images in a folder
# parameters : 
#     parameter(1) : path to the folder
#     parameter(2) : class label of the images in that folder(forged or not)
# return types : stores the features in the list 

def feature_extraction(path_to_folder, class_label):
    for file_name in os.listdir(path_to_folder):
        
        # join image file name with the path to the folder
        # to get full path of the image file
        path_to_img = os.path.join(path_to_folder,file_name)
        img = cv2.imread(path_to_img)
        
        
        
        # if the image file is empty or didn't load continue from beginining
        if np.shape(img) == ():
            continue
            
        # converts the image into YCrCb color space and take the Cr component only
        img = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb) 
        img = img[:,:,1]
        img_h, img_w = img.shape
        
        
        # calculates the starting points of the blocks
        X_points = start_points(img_w, split_width, 0.0)
        Y_points = start_points(img_h, split_height, 0.0)
        
        
        # calculates dct for eact block of an image
        dct_blocks=[]
        for i in Y_points:
            for j in X_points:
                block = img[i:i+split_height, j:j+split_width] # contains the non-overlapping block 
                lbp = local_binary_pattern(block, P, R, method='default')
                dct_block = dct(lbp, type=2, n=None, axis=-1, norm=None, overwrite_x=False)
                dct_blocks.append(dct_block)
        
        dct_blocks_array=np.asarray(dct_blocks)
        
        _,r,c=dct_blocks_array.shape
        
        
        img_std_list=[] #length should be r*c i.e 16*16=256 in our case.
        with_name_list=[file_name,class_label]
        
        for x in range(r):
            for y in range(c):
                pixel_depth_subarr=dct_blocks_array[:,x,y]
                std=np.std(pixel_depth_subarr)
                img_std_list.append(std)
                with_name_list.append(std)
        
        #name_list.append(file_name) 
        feature_vector.append(img_std_list)
        label.append(class_label)
        dataframe_list.append(with_name_list)

In [5]:
split_width

16

In [6]:
feature_vector=[]
label=[]
# name_list=[]
dataframe_list=[]

# #CASIA V1.0 dataset
au_path = "E:\FinalProject\learning\dataset\working\CASIA1\Au\Au" 
tp_path1 = "E:\FinalProject\learning\dataset\working\CASIA1\Modified Tp\Tp\CM"
tp_path2 = "E:\FinalProject\learning\dataset\working\CASIA1\Modified Tp\Tp\Sp"
feature_extraction(au_path, 0)
feature_extraction(tp_path1, 1)
feature_extraction(tp_path2, 1)
dataframe_list
# Columbia dataset
# au_path="YOUR_PATH/Columbia_ImSpliceDataset/authentic"
# tp_path="YOUR_PATH/Columbia_ImSpliceDataset/spliced"
# feature_extraction(au_path, 0)
# feature_extraction(tp_path, 1)

# CASIA V2.0 Dataset
# au_path="YOUR_PATH/CASIA2.0_revised/Au"
# tp_path="YOUR_PATH/CASIA2.0_revised/Tp"
# feature_extraction(au_path, 0)
# feature_extraction(tp_path, 1)

[['Au_ani_0001.jpg',
  0,
  1511.498244137308,
  562.2086754751532,
  428.120626926512,
  296.6954813371801,
  288.3951324938695,
  296.99574039728725,
  303.1379661976303,
  272.89782342490474,
  259.0063576067342,
  265.31480103264386,
  283.3593289578702,
  289.33416011031585,
  295.3147103103316,
  348.2194446172797,
  360.44943162349193,
  482.44655511313067,
  1907.455585805822,
  462.25419997781756,
  422.21048994102597,
  321.17752722764556,
  340.08311796954723,
  447.5457845474281,
  406.28989704599564,
  317.9899665438415,
  266.85259384925513,
  273.2941811417044,
  304.3805605320305,
  295.62074080001537,
  243.0708924142944,
  254.93805938570145,
  278.65582526305064,
  387.04371722667673,
  1405.8906813994429,
  413.70703036397157,
  316.1936099343871,
  257.58890777432606,
  300.9676006876838,
  305.94067429570737,
  255.9238400206853,
  248.21183906730266,
  247.44090924332988,
  242.45052963893238,
  276.1705170056042,
  283.27801330276486,
  242.40052268986142,
  214

In [7]:
print("Length/Dimension of features", len(feature_vector[0]))
print("Length of feature vector", len(feature_vector))
print("length of label",len(label))

Length/Dimension of features 256
Length of feature vector 1721
length of label 1721


In [8]:
df=pd.DataFrame(dataframe_list)
df.rename(columns = {0: "image_names"}, inplace = True)
# df['label']=label #To add label column as well

df.head(8)

Unnamed: 0,image_names,1,2,3,4,5,6,7,8,9,...,248,249,250,251,252,253,254,255,256,257
0,Au_ani_0001.jpg,0,1511.498244,562.208675,428.120627,296.695481,288.395132,296.99574,303.137966,272.897823,...,31.712386,33.120184,33.865082,34.048007,35.898387,37.757225,37.820908,43.039056,46.838646,58.341715
1,Au_ani_0002.jpg,0,1451.277785,691.446614,671.068869,441.017046,343.455332,336.709893,316.581932,294.607045,...,39.191094,37.887527,37.466226,41.960587,42.005296,47.099761,43.918581,48.591678,47.437518,60.990844
2,Au_ani_0003.jpg,0,1565.297168,718.836708,584.140163,378.335969,328.301999,314.950926,297.939159,277.081906,...,40.593895,38.266854,40.099756,41.056545,42.724463,45.106531,44.87374,48.193475,54.333408,55.795184
3,Au_ani_0004.jpg,0,1874.033285,585.929965,523.680798,349.908835,327.243074,284.786578,280.14539,245.739112,...,36.146655,34.689502,35.612863,36.992492,35.183677,38.269135,44.657494,43.14577,43.083904,50.927619
4,Au_ani_0005.jpg,0,1492.020278,456.640748,319.275865,240.463716,213.742085,175.417136,175.931752,160.036146,...,28.014458,27.47726,30.570388,26.753329,28.701464,32.328405,30.421553,33.419605,32.91455,38.072796
5,Au_ani_0006.jpg,0,1702.514032,652.538447,547.661519,418.05947,335.882731,298.303967,301.812718,282.924279,...,37.290779,36.46566,35.001803,37.651362,40.545583,41.25295,44.21328,45.260921,52.02999,55.289388
6,Au_ani_0007.jpg,0,1269.380837,531.181109,465.940738,333.892035,291.603882,271.798626,273.700345,276.029888,...,32.589998,32.665905,35.236843,34.884946,35.458354,38.292347,37.369177,40.282418,47.103955,47.229385
7,Au_ani_0008.jpg,0,1247.180336,489.532898,458.393917,355.634368,302.010772,265.729242,263.725782,252.20555,...,36.171579,36.547793,37.458118,35.394169,36.767668,39.727125,41.27283,45.712413,53.031559,54.219319


In [9]:
scaler_norm = MinMaxScaler() 
df.iloc[:,1:] = scaler_norm.fit_transform(df.iloc[:,1:].to_numpy()) # Normalising the values in dataframe.

  df.iloc[:,1:] = scaler_norm.fit_transform(df.iloc[:,1:].to_numpy()) # Normalising the values in dataframe.


In [10]:
# path_csv="CASIA2_feature.csv"
path_csv="CASIA1_feature.csv"
# path_csv="Columbia_feature.csv"

df.to_csv(path_csv) #saving dataframe to csv.

## Classification using the extracted features

In [11]:
#df=pd.read_csv('YOUR_PATH/___features.csv')
df=pd.read_csv("CASIA1_feature.csv")
array=df.values
x_feature=array[:,3:]
y_label=array[:,2].astype('int')
print(x_feature.shape)
print(y_label.shape)

(1721, 256)
(1721,)


In [12]:
# Split the data
X_train,X_test,Y_train,Y_test=train_test_split(x_feature,y_label,test_size=0.20,random_state=7)
Y_train

array([0, 1, 0, ..., 0, 1, 0])

In [13]:
model_SVC=SVC(C=32,kernel='rbf',gamma=0.03125)

# Random check
kfold=KFold(n_splits=10)
#cv_results=cross_val_score(model_SVC,X_train_norm,Y_train,cv=kfold,scoring='accuracy')
#msg="%s %f (%f)" % ('Training Accuracy: ',cv_results.mean(),cv_results.std())
#print(msg)

In [17]:
model_SVC = SVC(C=32,gamma=0.03125, kernel='rbf') #Can also try for GridSearch yourself
model_SVC.fit(X_train,Y_train) 

predictions=model_SVC.predict(X_test)

print(accuracy_score(Y_test,predictions))
print(confusion_matrix(Y_test,predictions))
print(classification_report(Y_test,predictions))

0.9681159420289855
[[173   9]
 [  2 161]]
              precision    recall  f1-score   support

           0       0.99      0.95      0.97       182
           1       0.95      0.99      0.97       163

    accuracy                           0.97       345
   macro avg       0.97      0.97      0.97       345
weighted avg       0.97      0.97      0.97       345

