# SRNet data set creation

In this script the guidelines given in ["Deep Residual Network for Steganalysis of Digital Images" by Mehdi Boroumand et al.](https://ieeexplore.ieee.org/document/8470101) are followed in order to create a similar dataset using both [BOSS](http://agents.fel.cvut.cz/boss/) and [BOWS](http://bows2.ec-lille.fr/).

In [1]:
import os
import random
import numpy as np
from shutil import copyfile

In [2]:
BOSSBASE_COVER_DIR = 'BOSSbase_1.01_cover_256x256'
BOSSBASE_STEGO_DIR = 'BOSSbase_1.01_stego_0.4_256x256'
BOWS_COVER_DIR = 'BOWS2OrigEp3_cover_256x256'
BOWS_STEGO_DIR = 'BOWS2OrigEp3_stego_0.4_256x256'

OUTPUT_DIR = 'SRNet-Dataset-0.4'

SEED = 1
random.seed(SEED)
np.random.seed(SEED)

## Selection of the images

### Train set

In [3]:
boss_train_set = [] 
bows_train_set = []

#### 4.000 pairs from BOSSBase

In [4]:
cover_bossbase_filenames = os.listdir(BOSSBASE_COVER_DIR)

random_4000_boss_filenames = list(np.random.choice(cover_bossbase_filenames, size=4000, replace=False))
boss_train_set.extend(random_4000_boss_filenames)

In [5]:
# Remove the selected images
cover_bossbase_filenames = list(set(cover_bossbase_filenames) - set(random_4000_boss_filenames))
len(cover_bossbase_filenames)

6000

#### The whole BOWS dataset

In [6]:
cover_bows_filenames = os.listdir(BOWS_COVER_DIR)

bows_train_set.extend(cover_bows_filenames)
len(bows_train_set)

10000

### Validation

In [7]:
# 1.000 images for validation
boss_val_set = list(np.random.choice(cover_bossbase_filenames, size=1000, replace=False))

# Remove the 1.000 images from validation
cover_bossbase_filenames = list(set(cover_bossbase_filenames) - set(boss_val_set))
len(cover_bossbase_filenames)

5000

### Test

In [8]:
boss_test_set = cover_bossbase_filenames
len(boss_test_set)

5000

In [9]:
assert set(boss_test_set) & set(boss_val_set) & set(boss_train_set) == set()
assert set(boss_test_set) & set(boss_val_set) == set()
assert set(boss_test_set) & set(boss_train_set) == set()
assert set(boss_val_set) & set(boss_train_set) == set()

## Saving the images

In [10]:
# Generate the folders necessary to train and test.
assert not os.path.isdir(OUTPUT_DIR)
os.mkdir(OUTPUT_DIR)

sets = ['train', 'val', 'test']
for set_name in sets:
    os.mkdir(os.path.join(OUTPUT_DIR, set_name))
    os.mkdir(os.path.join(OUTPUT_DIR, set_name, '0'))
    os.mkdir(os.path.join(OUTPUT_DIR, set_name, '1'))

In [11]:
# Copy the images into their respective folders
def copy_images_stego_cover_repetition_in_set(filenames, set_name, cover_dir, stego_dir):
    
    # Copy all the files into their respective folder
    for image_name in filenames:
        copyfile(os.path.join(cover_dir, image_name), 
                 os.path.join(OUTPUT_DIR, set_name, '0', image_name))
        
        copyfile(os.path.join(stego_dir, image_name), 
                 os.path.join(OUTPUT_DIR, set_name, '1', image_name))
        
copy_images_stego_cover_repetition_in_set(boss_train_set, 'train', BOSSBASE_COVER_DIR, BOSSBASE_STEGO_DIR)
copy_images_stego_cover_repetition_in_set(bows_train_set, 'train', BOWS_COVER_DIR, BOWS_STEGO_DIR)

copy_images_stego_cover_repetition_in_set(boss_val_set, 'val', BOSSBASE_COVER_DIR, BOSSBASE_STEGO_DIR)
copy_images_stego_cover_repetition_in_set(boss_test_set, 'test', BOSSBASE_COVER_DIR, BOSSBASE_STEGO_DIR)