# Coverage Dataset Preprocessing
<font size=3>Generate the coordinates of the bounding box from the Groundtruth mask.<br>
    We obtained the coverage dataset [here](https://ieeexplore.ieee.org/document/7532339)<br>
    In addition, we divide all images into training and test sets according to the ratio of Tampering factors.<br>
    The number of training and test sets is the same as RGB-N.<br>
    We provide text files for the training and test sets. See Readme.md for more details.<font>

In [1]:
import cv2
import scipy.io as io
import os
from glob import glob
from sklearn.model_selection import train_test_split

In [2]:
def bounding_box(image,mask,row_data):
    box_list=[]
    gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
    ret,binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, hierarchy = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    print("Contours number：", len(contours))
    contours = sorted(contours, key=lambda i: len(i),reverse=True)
    x, y, w, h = cv2.boundingRect(contours[0])
    x1=x
    y1=y
    x2=x+w
    y2=y+h
    box_list.append(str(x1)+'_'+str(y1)+'_'+ str(x2)+'_'+str(y2))
    return box_list

In [3]:
def load_image(image_path):
    img = cv2.imread(image_path)
    return img

In [4]:
data_path='../dataset/COVER_DATASET/'
mat_path = data_path+'label/TFlabel.mat'    # Tampering factors.
probe_path=data_path+'probe/'
data = io.loadmat(mat_path)
cls=data['TFlabel']

if not os.path.exists(probe_path):
  os.makedirs(probe_path)

n=1  # Coverage data contains 100 images.
while n<=100:
    img_id=str(n)
    mask = load_image(data_path+'mask/'+img_id+'forged.tif')
    image = load_image(data_path+'image/'+img_id+'t.tif')
    
    #A few images are different in size from the mask and need to be processed separately.
    if(mask.shape!=image.shape):   
        print(mask.shape)
        print(image.shape)
        print(img_id)

    box_list = bounding_box(image,mask, img_id)
    # Start from 0
    cls_id=int(cls[n-1])
    for i in range(0,len(box_list)):
         name_str='TP_'+img_id+'_'+box_list[i]+'_'+str(cls_id)
         cv2.imwrite(probe_path+name_str.rstrip('_')+ ".png", image)
    n=n+1
print('\n\n=======================Done==============================!')


Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 2
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours number： 1
Contours num

In [5]:
data_dir = probe_path  # FIXME
ext = 'TP*'
cls = ['1', '2', '3', '4', '5', '6']  # Tampering factors.
filenames = glob(os.path.join(data_dir, ext))
print('The number of images is %d'%len(filenames))
pic_name=[]
mani_type=[]
for file in filenames:
    content = os.path.splitext(os.path.basename(file))[0].split("_")
    pic_name.append(os.path.splitext(os.path.basename(file))[0])
    mani_type .append(content[-1])
    
print("len pic_name: %d"%len(pic_name))
print("len mani_type: %d \n"%len(mani_type))
print('=======Split train and test set========')
pic_name_train, pic_name_test, mani_type_train, mani_type_test = train_test_split(pic_name, mani_type, test_size=0.25, random_state=0)

print("train set number: %d"%len(pic_name_train))
print("test set number: %d"%len(pic_name_test))

with open(data_path+'cover_train_single.txt', 'w') as f:
    for pic in pic_name_train:
        content = pic.split("_")
        if content[-1] in cls:
            content2 = [str(i) for i in content[2:-1]]
            content3=' '.join(content2)
            f.write('%s %s %s\n' % (pic,content3,'tamper'))



with open(data_path+'cover_test_single.txt', 'w') as f:
    for pic in pic_name_test:
        content = pic.split("_")
        if content[-1] in cls:
            content2 = [str(i) for i in content[2:-1]]
            content3=' '.join(content2)
            f.write('%s %s %s\n' % (pic,content3,'tamper'))


print('=============Split over==============')


The number of images is 100
len pic_name: 100
len mani_type: 100 

train set number: 75
test set number: 25
