Libraries

In [None]:
!pip install stegano                     # steganalysis library
!pip install -q efficientnet_pytorch     # Convolutional Neural Net from Google Research

In [None]:
import stegano
from stegano import lsb

# System
import cv2
import os, os.path
from PIL import Image              # from RBG to YCbCr

# Basics
import pandas as pd
import numpy as np
from numpy import pi                # for DCT
from numpy import r_                # for DCT
import scipy                        # for cosine similarity
from scipy import fftpack           # for DCT
import random
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg    # to check images
%matplotlib inline
from tqdm.notebook import tqdm      # beautiful progression bar

# SKlearn
from sklearn.model_selection import KFold
from sklearn import metrics

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch import FloatTensor, LongTensor
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn.functional as F

# Data Augmentation for Image Preprocessing
from albumentations import (ToFloat, Normalize, VerticalFlip, HorizontalFlip, Compose, Resize,
                            RandomBrightness, RandomContrast, HueSaturationValue, Blur, GaussNoise)
from albumentations.pytorch import ToTensorV2, ToTensor
from efficientnet_pytorch import EfficientNet
from torchvision.models import resnet34

import warnings
warnings.filterwarnings("ignore")

In [None]:
print(list(os.listdir("../input/v2-effnet-epoch-6-auc-08023")))

In [None]:
# Create a new image with secret message
msg_to_hide = "something!!!!!!"
secret = lsb.hide("../input/suki-image/capture27.png", 
                    msg_to_hide, 
                    auto_convert_rgb=True)
secret.save("./SukiSecret.png")

# Reveal the hidden message
print(lsb.reveal("./SukiSecret.png"))

# See the 2 images side by side (no apparent difference, but WE KNOW the text is there.)
f, ax = plt.subplots(1, 2, figsize=(14,5))
                           
original = mpimg.imread('../input/suki-image/capture27.png')
original_plot = ax[0].imshow(original)

altered = mpimg.imread('./SukiSecret.png')
altered_plot = ax[1].imshow(altered)


We can check how similar are the images by substracting one matrix from the other. 

the **similarity** of the abote 2 images, to see if there is any hidden information in the altered image:

In [None]:
# From image to array 
# (vectorize the matrix to be able to feed it to the cosine function)
original_vector = np.array(original).flatten()
altered_vector = np.array(altered).flatten()

print('Original shape:', original_vector.shape, '\n' +
      'Altered shape:', altered_vector.shape)


# Distance between the original image and itself (should be 0, because they are identical)
dist1 = np.sum(original_vector - original_vector)
print('Dist1:', dist1)

# Distance between the original image and altered image
dist2 = np.sum(original_vector - altered_vector)
print('Dist2:', dist2)


There are 75k files in Cover, JMiPOD, JUNIWARD and UERD and 5k files in Test. We can't read the image arrays all at once, because the available RAM is not enough to perform this task.

In [None]:
# ---- STATICS ----
base_path = '../input/alaska2-image-steganalysis'

def read_images_path(dir_name='Cover', test = False):
    '''series_name: 0001.jpg, 0002.jpg etc.
    series_paths: is the complete path to a certain image.'''
    
    # Get name of the files
    series_name = pd.Series(os.listdir(base_path + '/' + dir_name))
    if test:
        series_name = pd.Series(os.listdir(base_path + '/' + 'Test'))
    
    # Create the entire path
    series_paths = pd.Series(base_path + '/' + dir_name + '/' + series_name)
    
    return series_paths

In [None]:
# Read in the data
cover_paths = read_images_path('Cover', False)
jmipod_paths = read_images_path('JMiPOD', False)
juniward_paths = read_images_path('JUNIWARD', False)
uerd_paths = read_images_path('UERD', False)
test_paths = read_images_path('Test', True)

### Show some Images

In [None]:
def show15(title = "Default"):
    '''Shows n amount of images in the data'''
    plt.figure(figsize=(16,9))
    plt.suptitle(title, fontsize = 16)
    
    for k, path in enumerate(cover_paths[:15]):
        cover = mpimg.imread(path)
        
        plt.subplot(3, 5, k+1)
        plt.imshow(cover)
        plt.axis('off')

In [None]:
show15(title = "15 Original Images")

### Images shape, size, data type
* all images are 512 x 512 x 3
* all images are of size 786,432 
* all images are uint8 type

In [None]:
image_sample = mpimg.imread(cover_paths[0])

print('Image sample shape:', image_sample.shape)
print('Image sample size:', image_sample.size)
print('Image sample data type:', image_sample.dtype)


There are 3 main different algorithms applied to the original image and used to encode information into it:
* JMiPOD 
* JUNIWARD
* UERD

> All images have the corresponding encoding at the same name.

In [None]:
def show_images_alg(n = 3, title="Default"):
    '''Returns a plot of the original Image and Encoded ones.
    n: number of images to display'''
    
    f, ax = plt.subplots(n, 4, figsize=(16, 7))
    plt.suptitle(title, fontsize = 16)
    

    for index in range(n):
        cover = mpimg.imread(cover_paths[index])
        ipod = mpimg.imread(jmipod_paths[index])
        juni = mpimg.imread(juniward_paths[index])
        uerd = mpimg.imread(uerd_paths[index])

        # Plot
        ax[index, 0].imshow(cover)
        ax[index, 1].imshow(ipod)
        ax[index, 2].imshow(juni)
        ax[index, 3].imshow(uerd)
        
        # Add titles
        if index == 0:
            ax[index, 0].set_title('Original', fontsize=12)
            ax[index, 1].set_title('IPod', fontsize=12)
            ax[index, 2].set_title('Juni', fontsize=12)
            ax[index, 3].set_title('Uerd', fontsize=12)

In [None]:
show_images_alg(n = 3, title = "Algorithm Difference")




 Visualizing the data: YCbCr channels

In [None]:
def show_ycbcr_images(n = 3, title = "Default"):
    '''Shows n images as: original RGB, YCbCr and Y, Cb, Cr channels split'''
    
    # 4: original image, YCbCr image, Y, Cb, Cr (separate chanels)
    fig, ax = plt.subplots(n, 5, figsize=(16, 7))
    plt.suptitle(title, fontsize = 16)

    for index, path in enumerate(cover_paths[:n]):
        # Read in the original image and convert
        original_image = Image.open(path)
        ycbcr_image = original_image.convert('YCbCr')
        (y, cb, cr) = ycbcr_image.split()

        # Plot
        ax[index, 0].imshow(original_image)
        ax[index, 1].imshow(ycbcr_image)
        ax[index, 2].imshow(y)
        ax[index, 3].imshow(cb)
        ax[index, 4].imshow(cr)

        # Add Title
        if index==0:
            ax[index, 0].set_title('Original', fontsize=12)
            ax[index, 1].set_title('YCbCr', fontsize=12)
            ax[index, 2].set_title('Y', fontsize=12)
            ax[index, 3].set_title('Cb', fontsize=12)
            ax[index, 4].set_title('Cr', fontsize=12)

In [None]:
show_ycbcr_images(n = 3, title = "YCbCr Channels")

Visualize DCT Coefficients





In [None]:
# Read in an Image Example
image = mpimg.imread(cover_paths[2])

plt.figure(figsize = (6, 6))
plt.imshow(image)
plt.title('Original Image', fontsize=16)
plt.axis('off');

Create DCT Function:

In [None]:
# Define 2D DCT
def dct2(a):
    # Return the Discrete Cosine Transform of arbitrary type sequence x.
    return fftpack.dct(fftpack.dct( a, axis=0, norm='ortho' ), axis=1, norm='ortho')

# Perform a blockwise DCT
imsize = image.shape
dct = np.zeros(imsize)

# Do 8x8 DCT on image (in-place)
for i in r_[:imsize[0]:8]:
    for j in r_[:imsize[1]:8]:
        dct[i:(i+8),j:(j+8)] = dct2( image[i:(i+8),j:(j+8)] )

Look at an 8x8 block: original vs DCT coeff

In [None]:
# ---- STATICS ----
pos = 128   # can be changed

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# Display original
ax1.imshow(image[pos:pos+8,pos:pos+8],cmap='gray')
ax1.set_title("An 8x8 block : Original Image", fontsize=16)

# Display the dct of that block
ax2.imshow(dct[pos:pos+8,pos:pos+8],cmap='gray',vmax= np.max(dct)*0.01,vmin = 0, extent=[0,pi,pi,0])
ax2.set_title("An 8x8 DCT block", fontsize = 16);

Display ALL DCT blocks against the original image

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# Original image
ax1.imshow(image);
ax1.set_title("Original Image", fontsize = 16);

# DCT Blocks
ax2.imshow(dct,cmap='gray',vmax = np.max(dct)*0.01,vmin = 0)
ax2.set_title("DCT blocks", fontsize = 14);