In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skimage import io, color, transform, util
import os

In [256]:
# Function to retrieve a folder of images
def get_images(folder="/content/covers"):
  images = {} # name: image
  for file in os.listdir(folder):
    name, ext = os.path.splitext(file)
    path = os.path.join(folder, file)
    img = io.imread(path)
    if len(img.shape) >= 3:
      img = color.rgb2gray(img)
    img = util.img_as_ubyte(img) # Normalize
    img = transform.resize(img, (1024,1024), preserve_range=True).astype(np.uint8)
    images[name] = img
  return images

In [257]:
# Retrieve steganographic and cover images and save them to cover_images and stego_images
img_loc = 'Images/'
cover_loc = img_loc + 'cover/'
stego_loc = img_loc + 'stego/'
cover_images = get_images(cover_loc)
stego_images = get_images(stego_loc)

In [459]:
stego_images.keys() == '0.1'

False

In [11]:
# Random functions made from previous projects

# Save image to file using spatial domain format (png)
def save_image(img, location="output.png"):
  location = "/content/" + location # ***REMOVE THIS IF YOU ARE NOT USING COLAB***
  io.imsave(location, img)
  print("Image saved as:", location)

# Print image to screen
#   - bw (Black & White Option)
#   - title (Title to print)
def display_image(img, bw=False, title=""):
  if(bw): plt.imshow(img, cmap=plt.cm.gray)
  else: plt.imshow(img)
  plt.title(title)
  plt.show()

def bitget(img, bit):
  return np.unpackbits(img)[:,bit]

def bitplane(img, plane):
  return img & plane

# Examples (testing)
# display_image(img, title="Color")
# display_image(img_bw, bw=True)
# img_bw

In [236]:
def image_frame(image_dict, stego=False):
    names = image_dict.keys()
    images = image_dict.values()
    data = {
        'name': list(names),
        'image': list(images),
    }
    df = pd.DataFrame(data)
    df['stego'] = stego
    return df

In [471]:
# Create a dataframe from the cover and stego images
df1 = image_frame(cover_images)
df1

Unnamed: 0,name,image,stego
0,265701,"[[202, 201, 203, 203, 202, 203, 204, 204, 205,...",False
1,266861,"[[137, 146, 152, 157, 160, 167, 166, 168, 168,...",False
2,268021,"[[221, 222, 222, 224, 223, 224, 224, 225, 224,...",False
3,269181,"[[43, 60, 78, 89, 104, 120, 125, 119, 122, 125...",False
4,274401,"[[145, 144, 141, 136, 118, 65, 55, 48, 60, 118...",False
...,...,...,...
95,cc649305,"[[130, 130, 130, 130, 129, 128, 127, 126, 127,...",False
96,cc650821,"[[65, 61, 57, 52, 50, 50, 52, 53, 50, 52, 53, ...",False
97,cc650853,"[[22, 24, 27, 25, 15, 6, 6, 12, 14, 17, 22, 25...",False
98,cc668221,"[[179, 179, 179, 179, 179, 177, 176, 176, 174,...",False


In [474]:
# Get an even split of embedded images of rates 0.1 and 0.4
df2 = image_frame(stego_images,stego=True)
df2 = df2.sort_values('name')[:int(len(df2)/2)]
df2

Unnamed: 0,name,image,stego
0,265701-37-0.1_stego,"[[202, 201, 203, 203, 202, 203, 204, 204, 205,...",True
1,265701-37-0.4_stego,"[[202, 201, 203, 203, 202, 204, 204, 204, 204,...",True
2,266861-36-0.1_stego,"[[137, 146, 152, 157, 160, 167, 166, 168, 168,...",True
3,266861-36-0.4_stego,"[[137, 147, 152, 157, 160, 167, 166, 168, 168,...",True
4,268021-98-0.1_stego,"[[221, 222, 222, 224, 223, 224, 224, 225, 224,...",True
...,...,...,...
95,650853-26-0.4_stego,"[[40, 25, 32, 18, 41, 37, 27, 32, 36, 29, 45, ...",True
96,668221-13-0.1_stego,"[[193, 194, 195, 197, 197, 198, 196, 197, 194,...",True
97,668221-13-0.4_stego,"[[193, 194, 195, 197, 197, 198, 197, 197, 194,...",True
98,668253-10-0.1_stego,"[[84, 86, 86, 83, 87, 86, 85, 89, 89, 88, 87, ...",True


In [475]:
# Merge dataframes
df = pd.concat([df1, df2], ignore_index=True)
df

Unnamed: 0,name,image,stego
0,265701,"[[202, 201, 203, 203, 202, 203, 204, 204, 205,...",False
1,266861,"[[137, 146, 152, 157, 160, 167, 166, 168, 168,...",False
2,268021,"[[221, 222, 222, 224, 223, 224, 224, 225, 224,...",False
3,269181,"[[43, 60, 78, 89, 104, 120, 125, 119, 122, 125...",False
4,274401,"[[145, 144, 141, 136, 118, 65, 55, 48, 60, 118...",False
...,...,...,...
195,650853-26-0.4_stego,"[[40, 25, 32, 18, 41, 37, 27, 32, 36, 29, 45, ...",True
196,668221-13-0.1_stego,"[[193, 194, 195, 197, 197, 198, 196, 197, 194,...",True
197,668221-13-0.4_stego,"[[193, 194, 195, 197, 197, 198, 197, 197, 194,...",True
198,668253-10-0.1_stego,"[[84, 86, 86, 83, 87, 86, 85, 89, 89, 88, 87, ...",True


In [476]:
# Drop potential duplicates
df.drop_duplicates('image')

Unnamed: 0,name,image,stego
0,265701,"[[202, 201, 203, 203, 202, 203, 204, 204, 205,...",False
1,266861,"[[137, 146, 152, 157, 160, 167, 166, 168, 168,...",False
2,268021,"[[221, 222, 222, 224, 223, 224, 224, 225, 224,...",False
3,269181,"[[43, 60, 78, 89, 104, 120, 125, 119, 122, 125...",False
4,274401,"[[145, 144, 141, 136, 118, 65, 55, 48, 60, 118...",False
...,...,...,...
195,650853-26-0.4_stego,"[[40, 25, 32, 18, 41, 37, 27, 32, 36, 29, 45, ...",True
196,668221-13-0.1_stego,"[[193, 194, 195, 197, 197, 198, 196, 197, 194,...",True
197,668221-13-0.4_stego,"[[193, 194, 195, 197, 197, 198, 197, 197, 194,...",True
198,668253-10-0.1_stego,"[[84, 86, 86, 83, 87, 86, 85, 89, 89, 88, 87, ...",True


In [477]:
# Encode the Labels (stego: True or False) as 1 and 0 for binary classification
df['stego'] = df['stego'].astype(int)
df

Unnamed: 0,name,image,stego
0,265701,"[[202, 201, 203, 203, 202, 203, 204, 204, 205,...",0
1,266861,"[[137, 146, 152, 157, 160, 167, 166, 168, 168,...",0
2,268021,"[[221, 222, 222, 224, 223, 224, 224, 225, 224,...",0
3,269181,"[[43, 60, 78, 89, 104, 120, 125, 119, 122, 125...",0
4,274401,"[[145, 144, 141, 136, 118, 65, 55, 48, 60, 118...",0
...,...,...,...
195,650853-26-0.4_stego,"[[40, 25, 32, 18, 41, 37, 27, 32, 36, 29, 45, ...",1
196,668221-13-0.1_stego,"[[193, 194, 195, 197, 197, 198, 196, 197, 194,...",1
197,668221-13-0.4_stego,"[[193, 194, 195, 197, 197, 198, 197, 197, 194,...",1
198,668253-10-0.1_stego,"[[84, 86, 86, 83, 87, 86, 85, 89, 89, 88, 87, ...",1


In [478]:
# Extracting LSB for each image and asserting it as a feature
df['lsb'] = df['image'].apply(lambda x: (x & 1).flatten())
df

Unnamed: 0,name,image,stego,lsb
0,265701,"[[202, 201, 203, 203, 202, 203, 204, 204, 205,...",0,"[0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, ..."
1,266861,"[[137, 146, 152, 157, 160, 167, 166, 168, 168,...",0,"[1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, ..."
2,268021,"[[221, 222, 222, 224, 223, 224, 224, 225, 224,...",0,"[1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, ..."
3,269181,"[[43, 60, 78, 89, 104, 120, 125, 119, 122, 125...",0,"[1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, ..."
4,274401,"[[145, 144, 141, 136, 118, 65, 55, 48, 60, 118...",0,"[1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, ..."
...,...,...,...,...
195,650853-26-0.4_stego,"[[40, 25, 32, 18, 41, 37, 27, 32, 36, 29, 45, ...",1,"[0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, ..."
196,668221-13-0.1_stego,"[[193, 194, 195, 197, 197, 198, 196, 197, 194,...",1,"[1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, ..."
197,668221-13-0.4_stego,"[[193, 194, 195, 197, 197, 198, 197, 197, 194,...",1,"[1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, ..."
198,668253-10-0.1_stego,"[[84, 86, 86, 83, 87, 86, 85, 89, 89, 88, 87, ...",1,"[0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, ..."


In [479]:
# Extracting a histograph using the LSB
df['hist'] = df['lsb'].apply(lambda x: np.histogram(x,bins=2)[0])
df

Unnamed: 0,name,image,stego,lsb,hist
0,265701,"[[202, 201, 203, 203, 202, 203, 204, 204, 205,...",0,"[0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, ...","[524408, 524168]"
1,266861,"[[137, 146, 152, 157, 160, 167, 166, 168, 168,...",0,"[1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, ...","[525335, 523241]"
2,268021,"[[221, 222, 222, 224, 223, 224, 224, 225, 224,...",0,"[1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, ...","[520165, 528411]"
3,269181,"[[43, 60, 78, 89, 104, 120, 125, 119, 122, 125...",0,"[1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, ...","[518339, 530237]"
4,274401,"[[145, 144, 141, 136, 118, 65, 55, 48, 60, 118...",0,"[1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, ...","[524428, 524148]"
...,...,...,...,...,...
195,650853-26-0.4_stego,"[[40, 25, 32, 18, 41, 37, 27, 32, 36, 29, 45, ...",1,"[0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, ...","[531661, 516915]"
196,668221-13-0.1_stego,"[[193, 194, 195, 197, 197, 198, 196, 197, 194,...",1,"[1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, ...","[525089, 523487]"
197,668221-13-0.4_stego,"[[193, 194, 195, 197, 197, 198, 197, 197, 194,...",1,"[1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, ...","[529639, 518937]"
198,668253-10-0.1_stego,"[[84, 86, 86, 83, 87, 86, 85, 89, 89, 88, 87, ...",1,"[0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, ...","[530425, 518151]"


In [527]:
from scipy.stats import chi2_contingency

In [480]:
df.columns
# About output: image, lsb, and hist are all features where stego (0 or 1) is our label

Index(['name', 'image', 'stego', 'lsb', 'hist'], dtype='object')

In [518]:
df.head()

Unnamed: 0,name,image,stego,lsb,hist
0,265701,"[[202, 201, 203, 203, 202, 203, 204, 204, 205,...",0,"[0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, ...","[524408, 524168]"
1,266861,"[[137, 146, 152, 157, 160, 167, 166, 168, 168,...",0,"[1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, ...","[525335, 523241]"
2,268021,"[[221, 222, 222, 224, 223, 224, 224, 225, 224,...",0,"[1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, ...","[520165, 528411]"
3,269181,"[[43, 60, 78, 89, 104, 120, 125, 119, 122, 125...",0,"[1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, ...","[518339, 530237]"
4,274401,"[[145, 144, 141, 136, 118, 65, 55, 48, 60, 118...",0,"[1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, ...","[524428, 524148]"


In [585]:
from sklearn.model_selection import train_test_split

In [586]:
def test_model(model, X, y):
    # Split the features and labels to a training and testing set
    #   * Standard splits are 80-20 or 90-10; we use 90-10 for best fit of sample.
    X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.1, random_state=1, shuffle=True)

    # Train and test the classifier
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)


In [621]:
def create_features(df):
    X = []
    for i in range(len(df)):
        a = df.loc[i,'image'].flatten()
        b = df.loc[i,'lsb']
        c = df.loc[i,'hist']
        abc = np.concatenate((a,b,c))
        X.append(abc)
    return np.array(X)

def extract_features(image):
    a = image.flatten()
    b = (image & 1).flatten()
    c = np.histogram(image,bins=2)[0]
    abc = np.concatenate((a,b,c))
    return np.array(abc).reshape(1,-1)

In [604]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC

In [622]:
X = create_features(df)
y = df['stego']

In [631]:
clf = RandomForestClassifier(n_estimators=100)
test_model(clf, X, y)

0.45

In [629]:
clf2 = KNeighborsClassifier()
test_model(clf2, X, y)

0.35

In [630]:
clf3 = LinearSVC(max_iter=10, random_state=1)
test_model(clf3, X, y)



0.45

In [633]:
# Using the X from the cell above -- y remains the same.
#   Still using Random Forests

test_model(clf3, X2, y)



0.6

In [576]:
# Which steganographic images were improperly classified?
for name,img in stego_images.items():
    out = clf.predict(chisq((img & 1).flatten()))
    if out == [0]: print(name)
    # print((img & 1).flatten())

ValueError: Expected 2D array, got 1D array instead:
array=[0.23098758 0.6307925 ].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

It appears the algorithm was unbiased when mis-classifying different embedding rates.
This means that our features are very inaccurate and we will need different features for proving any certifiable differences.

In [542]:
df.head()

Unnamed: 0,name,image,stego,lsb,hist
0,265701,"[[202, 201, 203, 203, 202, 203, 204, 204, 205,...",0,"[0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, ...","[524408, 524168]"
1,266861,"[[137, 146, 152, 157, 160, 167, 166, 168, 168,...",0,"[1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, ...","[525335, 523241]"
2,268021,"[[221, 222, 222, 224, 223, 224, 224, 225, 224,...",0,"[1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, ...","[520165, 528411]"
3,269181,"[[43, 60, 78, 89, 104, 120, 125, 119, 122, 125...",0,"[1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, ...","[518339, 530237]"
4,274401,"[[145, 144, 141, 136, 118, 65, 55, 48, 60, 118...",0,"[1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, ...","[524428, 524148]"


In [544]:
# Chi-Squared Test as Features

def chisq(lsb):
    obs = np.bincount(lsb, minlength=2)
    total = len(lsb)
    exp = np.array([total/2, total/2])

    cst, p_val, _, _ = chi2_contingency([obs, exp])
    return cst,p_val

df['c_stat'] = df['lsb'].apply(
    lambda x: chisq(x)[0]
)
df['p_val'] = df['lsb'].apply(
    lambda x: chisq(x)[1]
)
df

Unnamed: 0,name,image,stego,lsb,hist,c_stat,p_val
0,265701,"[[202, 201, 203, 203, 202, 203, 204, 204, 205,...",0,"[0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, ...","[524408, 524168]",0.027010,8.694579e-01
1,266861,"[[137, 146, 152, 157, 160, 167, 166, 168, 168,...",0,"[1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, ...","[525335, 523241]",2.086863,1.485709e-01
2,268021,"[[221, 222, 222, 224, 223, 224, 224, 225, 224,...",0,"[1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, ...","[520165, 528411]",32.408040,1.249683e-08
3,269181,"[[43, 60, 78, 89, 104, 120, 125, 119, 122, 125...",0,"[1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, ...","[518339, 530237]",67.481695,2.126523e-16
4,274401,"[[145, 144, 141, 136, 118, 65, 55, 48, 60, 118...",0,"[1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, ...","[524428, 524148]",0.036852,8.477669e-01
...,...,...,...,...,...,...,...
195,650853-26-0.4_stego,"[[40, 25, 32, 18, 41, 37, 27, 32, 36, 29, 45, ...",1,"[0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, ...","[531661, 516915]",103.662626,2.398737e-24
196,668221-13-0.1_stego,"[[193, 194, 195, 197, 197, 198, 196, 197, 194,...",1,"[1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, ...","[525089, 523487]",1.220704,2.692226e-01
197,668221-13-0.4_stego,"[[193, 194, 195, 197, 197, 198, 197, 197, 194,...",1,"[1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, ...","[529639, 518937]",54.594508,1.481491e-13
198,668253-10-0.1_stego,"[[84, 86, 86, 83, 87, 86, 85, 89, 89, 88, 87, ...",1,"[0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, ...","[530425, 518151]",71.815082,2.363389e-17


In [566]:
X2 = df[['c_stat','p_val']]
X2

Unnamed: 0,c_stat,p_val
0,0.027010,8.694579e-01
1,2.086863,1.485709e-01
2,32.408040,1.249683e-08
3,67.481695,2.126523e-16
4,0.036852,8.477669e-01
...,...,...
195,103.662626,2.398737e-24
196,1.220704,2.692226e-01
197,54.594508,1.481491e-13
198,71.815082,2.363389e-17


In [584]:
# Neighborhood Test

def neighborhood(img):
    x,y = img.shape
    x_c = int(x /2)
    y_c = int(y / 2)

    # Count of 1s
    c = 0
 
    while (K):
        for i in range(N) :
            for j in range(N):
                c = 0
 
                # Counting all neighbouring 1s
                if (i > 0 and arr[i - 1][j] == 1):
                    c += 1
                if (j > 0 and arr[i][j - 1] == 1):
                    c += 1
                if (i > 0 and j > 0 and
                    arr[i - 1][j - 1] == 1):
                    c += 1
                if (i < N - 1 and arr[i + 1][j] == 1):
                    c += 1
                if (j < N - 1 and arr[i][j + 1] == 1):
                    c += 1
                if (i < N - 1 and j < N - 1
                    and arr[i + 1][j + 1] == 1):
                    c += 1
                if (i < N - 1 and j > 0
                    and arr[i + 1][j - 1] == 1):
                    c += 1
                if (i > 0 and j < N - 1
                    and arr[i - 1][j + 1] == 1):
                    c += 1
 
                # Comparing the number of neighbouring
                # 1s with given ranges
                if (arr[i][j] == 1) :
                    if (c >= range1a and c <= range1b):
                        b[i][j] = 1
                    else:
                        b[i][j] = 0
                 
                if (arr[i][j] == 0):
                    if (c >= range0a and c <= range0b):
                        b[i][j] = 1
                    else:
                        b[i][j] = 0
        K -= 1
 
        # Copying changes to the main matrix
        for k in range(N):
            for m in range( N):
                arr[k][m] = b[k][m]

return arr

img01 = df.loc[0, 'image']
neighborhood(img01)

(512, 512)

(1024, 1024)