In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
"""
Project Title: Image Steganalysis to detect secret data based on similarity-probability prediction sourced from an RGB-triad scorer

Headrunner Title: Detection based on RGB-triad scorer 
"""

"""
Key employed libraries:

os | Miscellaneous operating system interfaces: https://docs.python.org/3/library/os.html
pandas | https://pandas.pydata.org/
math | Mathematical Functions | https://docs.python.org/3/library/math.html
NumPy | https://numpy.org/
imageio | Interface to read and write a wide range of image data| https://pypi.org/project/imageio/
cv2 |  Open Source Computer Vision (OpenCVopencv-python 4.2.0.34 | https://pypi.org/project/opencv-python/

"""

"""
Pros
(1) Brute force approach with sampling option
(2) Data reduction based on a scorer to estimate for detecting
(2) Prediction with Machine Learning models for classification

"""

"""
Cons
(1) Test data set (Cover images) would require to be processed with the algorithms JMiPOD, JUNIWARD, UERD to get true values.
(2) AUC Filtering could be more accurate regarding the testing task if point 1 would be met since it can be input both true values and predicted values.
(3) Data reduction is based on a RGB-triad scorer; the scorer can be excesively representative because of the real-pixel configuration; i.e. 1 condensed-RGB-triad (mean of image triads within an RGB image) would represent an RGB image of 100x100 whic was resized from a 512x512 one, previously.

"""


"""
General notes of the coder:

(Note 1) The current code has been adapted to IaaS resources provided by Kaggle from 2 files created using on-premise resources to prototype.

(Note 2) The files are as follows: (1) ImageSimilary.py; (2) KaggleSolution2.py.

(Note 3) The current code does not pretend to process all the competition data.

"""

In [None]:
"""
Set default mode '/kaggle/working/'
"""
import shutil

print('\nHi there.')
default = input('\nDo you want to set the Kaggle resources to default mode? (y/n)')

if (default == 'y' or normTask == 'Y'):
    
    try:
        path1 = '/kaggle/working/output/'
        shutil.rmtree(path1, ignore_errors=False, onerror=None)
    except FileNotFoundError: #directory does not exist
        pass
    
    try: 
        path2 = '/kaggle/working/input/'
        shutil.rmtree(path2, ignore_errors=False, onerror=None) 
    except FileNotFoundError: #directory does not exist
        pass
    
    print('\nDefault mode has been set.')

else:
    print('\nThanks for your time... Bye')

"""
Reference:

How do I remove/delete a folder that is not empty? (n.d.). Retrieved from
    https://stackoverflow.com/questions/303200/how-do-i-remove-delete-a-folder-that-is-not-empty/303225#303225

"""

In [None]:
"""
Load Input image files of the competition

Please add data manually coresponding to the following competition:

ALASKA2 Image Steganalysis

"""

# Input directories

input_Cover = '/kaggle/input/alaska2-image-steganalysis/Cover/'
input_JMiPOD = '/kaggle/input/alaska2-image-steganalysis/JMiPOD/'
input_JUNIWARD = '/kaggle/input/alaska2-image-steganalysis/JUNIWARD/'
input_UERD = '/kaggle/input/alaska2-image-steganalysis/UERD/'
input_Test = '/kaggle/input/alaska2-image-steganalysis/Test/'
directoryListInput = [input_Cover]+[input_JMiPOD]+[input_JUNIWARD]+[input_UERD]+[input_Test]

# Number of Input Images and set of sample number:

def SampleFilesToCompete(directoryListInput, trainSample, testSample):

    import os
    
    print('\n*******************************************************************************************************************')
    print('Sampling Method: Aleatory extraction')
    print('*******************************************************************************************************************')
    
    input_Test = '/kaggle/input/alaska2-image-steganalysis/Test/'
    KaggleInputList = []
    
    for i in directoryListInput:
        
        listy = os.listdir(i)
        
        if i == input_Test:
            listySample = listy[0:testSample]
        else:
            listySample = listy[0:trainSample]
        
        print('\nSourced Directory of Kaggle: ',i)
        print('\nExpected Number of Files to be extracted: \n',len(listySample), ' from ', len(listy), ' available instances')
        
        KaggleInputList = KaggleInputList + [listySample]
        
    
    print('\n*******************************************************************************************************************')
    print('Sample Features:')
    print('*******************************************************************************************************************')
    print('\nTest-sample instances number: ', testSample)
    print('\nTrain-sample instances number: ', trainSample)
    print('\nDirectory Input List to compete sourced from Kaggle',directoryListInput)
    print('\nSample Files per directory',KaggleInputList)
    
    #pairSample = [directoryListInput, KaggleInputList]
    
    return directoryListInput, KaggleInputList

trainSample = input('\nPlease insert a sample-number of instances for training (0-1000) non-test-images:')
trainSample = int(trainSample)
testSample = input('\nPlease insert a sample-number of instances for testing (0-5000) test-images:')
testSample = int(testSample)

#trainSample = 10
#testSample = 10
directoryListInput, KaggleInputList = SampleFilesToCompete(directoryListInput,trainSample,testSample)
              

"""
Create directories to prototype with input image files of the competition data (prototype inputs)
"""

# Required directories

path_generic1 = '/kaggle/working/input/'
path_generic2 = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/'

path_Cover = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/Cover/'
path_JMiPOD = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/JMiPOD/'
path_JUNIWARD = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/JUNIWARD/'
path_UERD = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/UERD/'
path_Test = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/Test/'

directoryList = [path_generic1]+[path_generic2]+[path_Cover]+[path_JMiPOD]+[path_JUNIWARD]+[path_UERD]+[path_Test]
directoryListG = [path_Cover]+[path_JMiPOD]+[path_JUNIWARD]+[path_UERD]+[path_Test]

print('\n*******************************************************************************************************************')
print('Required directories to create')
print('*******************************************************************************************************************')
print('\nDirectories to create:','\n', directoryList)

# Function directoryCreation(directoryList)

def directoryCreation(directoryList):
    
    import os

    print('\n*******************************************************************************************************************')
    print('Created or existing directories')
    print('*******************************************************************************************************************')
    
    for i in directoryList:
    
        try:
            os.mkdir(i)
        except FileExistsError: # directory already exists
            pass
        
        print('\n Created or existing directory: ',i)
    
    print('\nDirectories are created and available to upload data...')
    
    return

# Run

directoryCreation(directoryList)
directoryList.remove(path_generic1)
directoryList.remove(path_generic2)

"""
Load Input image files to prototype
"""

# Function

def SampleFilesExtraction(directoryListInput, directoryListOutput, fileList):

    # Set working directory
    
    # Copy task
    
    # Based partially in Copy all JPG file in a directory to another directory in Python? (n.d.).
    
    print('\n*******************************************************************************************************************')
    print('Kaggle images extraction for sampling')
    print('*******************************************************************************************************************')
    print('\n')
    
    import shutil, os
    
    for i in range (0,len(directoryListInput)):
    
        print('Origin Directory: ', directoryListInput[i])
        print('Destination Directory: ', directoryListOutput[i])
    
        for f in fileList[i]:

            try: 
                # Set origin directory
                origin = directoryListInput[i]
                os.chdir(origin)

                # Copy task
                # Based partially in Copy all JPG file in a directory to another directory in Python? (n.d.).
                destination = directoryListOutput[i]
                shutil.copy(f, destination)
            
            except FileExistsError: # file already exists
                os.removedirs(f)
                
                # Set origin directory
                origin = directoryListInput[i]
                os.chdir(origin)

                # Copy task
                # Based partially in Copy all JPG file in a directory to another directory in Python? (n.d.).
                destination = directoryListOutput[i]
                shutil.copy(f, destination)
                
                pass
            
            print('  Extracted File: ', f)
        print('\n')
    
    return

# Run
directoryListOutput = directoryList
directoryList, fileList = directoryListInput, KaggleInputList
SampleFilesExtraction(directoryListInput, directoryListOutput, fileList)


"""
Reference:

Copy all JPG file in a directory to another directory in Python? (n.d.). Retrieved from
    https://stackoverflow.com/questions/11903037/copy-all-jpg-file-in-a-directory-to-another-directory-in-python

"""


In [None]:
"""
Create directories to prototype with input image files of the competition data (prototype outputs)
"""

# Required directories

path_generic1 = '/kaggle/working/output/'
path_generic2 = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/'

path_Cover = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Cover_ImageNormalized/'
path_JMiPOD = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JMiPOD_ImageNormalized/'
path_JUNIWARD = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JUNIWARD_ImageNormalized/'
path_UERD = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/UERD_ImageNormalized/'
path_Test = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Test_ImageNormalized/'

directoryList = [path_generic1]+[path_generic2]+[path_Cover]+[path_JMiPOD]+[path_JUNIWARD]+[path_UERD]+[path_Test]
directoryListG = [path_Cover]+[path_JMiPOD]+[path_JUNIWARD]+[path_UERD]+[path_Test]

print('\n*******************************************************************************************************************')
print('Required directories to create')
print('*******************************************************************************************************************')
print('\nDirectories to create:','\n', directoryList)

# Function directoryCreation(directoryList)

def directoryCreation(directoryList):
    
    import os

    print('\n*******************************************************************************************************************')
    print('Created or existing directories')
    print('*******************************************************************************************************************')
    
    for i in directoryList:
    
        try:
            os.mkdir(i)
        except FileExistsError: # directory already exists
            pass
        
        print('\n Created or existing directory: ',i)
    
    print('\nDirectories are created and available to upload data...')
    
    return

# Run

directoryCreation(directoryList)
directoryList.remove(path_generic1)
directoryList.remove(path_generic2)

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 03 14:47:16 2020

@author: Mauricio Azálgara Bedoya

Kaggle user account: mazalgarab

"""


"""
imageSimilarity.py
version adapted from orginal version
"""

def image_path_input_list():
    
    coverImageTypeList = ['JMiPOD','JUNIWARD','UERD','Cover', 'Test']
    
    image_path_input_list = []
    root = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/'
    
    for i in coverImageTypeList:
        
        component = root + i + '/'
        image_path_input_list = image_path_input_list + [component]
    
    return image_path_input_list


def image_path(coverImageType):

    coverImageTypeList = ['JMiPOD','JUNIWARD','UERD','Cover', 'Test']
    
    if(coverImageType == coverImageTypeList[0]):
        image_path = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/JMiPOD/'
    if(coverImageType == coverImageTypeList[1]):
        image_path = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/JUNIWARD/'
    if(coverImageType == coverImageTypeList[2]):
        image_path = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/UERD/'
    if(coverImageType == coverImageTypeList[3]):
        image_path = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/Cover/'
    if(coverImageType == coverImageTypeList[4]):
        image_path = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/Test/'
    
    return image_path


def image_list_bulk(image_path_input_list):
    
    import os
    
    image_list_bulk = []
    
    for i in image_path_input_list:
    
        component = os.listdir(i)
        image_list_bulk = image_list_bulk + [component]    
    
    return image_list_bulk


def reading_image_raw(filename, image_path):
    
    import imageio
    ## Reading an image file | (Rashka & Mirjalili, 2019, pp.532-536)
    ### Change1: Insert variable 'filename' for the filename in the form 'example-image.png'.
    ### Change2: Built into Function reading_image_raw(filename)       
    img = imageio.imread(image_path+filename)
    
    print('Image shape:', img.shape)
    print('Number of channels:', img.shape[2])
    print('Image data type:', img.dtype)
    print(img[100:102, 100:102, :])
    
    return img


def normalizing_image(imageio):
    
    import cv2
    ## Normalizing image | Convert numpy.ndarray into imageio.core.util.Image | (Rothman et al., 2018, pp. 470-479)
    ## OpenCV | Smoothing Images | Image Blurring | (OpenCV, n.d.)
    ## OpenCV | Geometric Transformations of Images | Scaling (OpenCV, n.d.)
    ## OpenCV | Geometric Image Transformations | Image procecssing | resize()  | (OpenCV, n.d.)
    ## From 512x512 pixels to 100x100 pixels | Adobe criteria to size (Adobe,n.d.) 
    imageio_re = cv2.resize(imageio, (100, 100), interpolation = cv2.INTER_AREA) 
    imageio_blur = cv2.GaussianBlur(imageio_re, (5, 5), 0)
    
    return imageio_blur

def imageio_array_to_DataFrame(imageio):
    
    import pandas as pd
    import numpy as np
    
    si,sj,sk = np.shape(imageio)
    numy = 0
    
    for i in range(0,si):
        
        if (i == 0):
            colNames = [str(numy)] + [str(numy+1)] + [str(numy+2)]
            numy = numy + 3
            df_left = pd.DataFrame(imageio[i], columns = colNames)
        
        else:
            colNames = [str(numy)] + [str(numy+1)] + [str(numy+2)]
            numy = numy + 3
            df_right = pd.DataFrame(imageio[i], columns = colNames) 
            df_left = pd.merge(df_left,df_right,left_index=True, right_index=True)
    
    return df_left


def image_resize_bulk_unitary(image_path_input_list,image_list_bulk,bulk):
    
    import pandas as pd
    
    # Set output directory for normalized images | image_path_output_list
    
    image_path1 = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JMiPOD_ImageNormalized/'
    image_path2 = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JUNIWARD_ImageNormalized/'
    image_path3 = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/UERD_ImageNormalized/'
    image_path4 = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Cover_ImageNormalized/'
    image_path5 = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Test_ImageNormalized/'
    image_path_output_list = [image_path1] +[image_path2] + [image_path3] + [image_path4] + [image_path5]
    
    ## Sequence: 'JMiPOD','JUNIWARD','UERD','Cover', 'Test'
    ## bulk = [0,1,2,3,4]
    
    i_input = image_path_input_list[bulk]
    i_output = image_path_output_list[bulk]
    image_path_output = i_output
    
    j = image_list_bulk[bulk]
    
    for k in j:
        
        imageio = reading_image_raw(k,i_input)
        image_array = normalizing_image(imageio)
        df = imageio_array_to_DataFrame(image_array)
        filename_output = k[:-4] + '.csv'
        df.to_csv(image_path_output+filename_output,sep=',',index=False,header=False)
                    
    return


"""
Run (Batching task)
"""
normTask = input('\nDo you want to start the image normalizing task for the image sample ? : (y/n)')
normTask = str(normTask)


if (normTask == 'y' or normTask == 'Y'):

    ## image_path_input_list
    image_path_input_list = image_path_input_list()
    
    ## image_list_bulk
    image_list_bulk = image_list_bulk(image_path_input_list)
    
    ## image_resize_bulk_unitary
    bulk = 0 #JMiPOD
    image_resize_bulk_unitary(image_path_input_list,image_list_bulk,bulk)
    bulk = 1 #JUNIWARD
    image_resize_bulk_unitary(image_path_input_list,image_list_bulk,bulk)
    bulk = 2 #UERD
    image_resize_bulk_unitary(image_path_input_list,image_list_bulk,bulk)
    bulk = 3 #Cover
    image_resize_bulk_unitary(image_path_input_list,image_list_bulk,bulk)
    bulk = 4 #test
    image_resize_bulk_unitary(image_path_input_list,image_list_bulk,bulk)
    
    print('\nImage normalizing complete for the sample.')

else:
    print('\nThanks for your time... Bye')
    
"""
Bibliography:

Raschka, S., & Mirjalili, V. (2019). Python Machine Learning: Machine Learning
    and Deep Learning with Python, scikit-learn, and TensorFlow 2 (3rd ed.).
    Birmingham, United Kingdom: Packt Publishing Ltd.
    
Rothman, D., Lamons, M., Kumar, R., Nagaraja, A., Ziai, A., & Dixit, A. (2018).
    Python: Beginner's Guide to Artificial Intelligence: Build applications to
    intelligently interact with the world around you using Python (1st ed.).
    Birmingham, United Kingdom: Packt Publishing Ltd.
    
Refrences:

Adobe. n.d. Resize and Crop images in Photoshop and Photoshop Elements. Retrieved from
    https://www.adobe.com/support/techdocs/331327.html    

Convert numpy.ndarray into imageio.core.util.Image. n.d. Retrieved from
    https://stackoverflow.com/questions/49269327/convert-numpy-ndarray-into-imageio-core-util-image/60888547#60888547    

OpenCV. n.d. Geometric Image Transformations. Retrieved from
    https://docs.opencv.org/trunk/da/d54/group__imgproc__transform.html#ga47a974309e9102f5f08231edc7e7529d

OpenCV. Geometric Transformations of Images. n.d. Retrieved from
    https://docs.opencv.org/trunk/da/d6e/tutorial_py_geometric_transformations.html

OpenCV. resize(). Retrieved from
    https://docs.opencv.org/trunk/da/d54/group__imgproc__transform.html#ga47a974309e9102f5f08231edc7e7529d

OpenCV. n.d. Smoothing Images. Retrieved from
    https://docs.opencv.org/master/d4/d13/tutorial_py_filtering.html

"""

In [None]:
"""
Re set initial sample size
"""
print('\n************************************************************************************************************************************')
trainSample = input('\nPlease re insert the initial sample-number of instances for training (0-1000) non-test-images:')
trainSample = int(trainSample)
testSample = input('\nPlease re insert the initial sample-number of instances for testing (0-5000) test-images:')
testSample = int(testSample)
print('\n************************************************************************************************************************************')

"""
Input Directory scheme and expected files
"""

# Competition Data
CD = '/kaggle/working/input/alaska2-image-steganalysis/'

# Competition Data to prototype (10 images per algorithm and 10 cover images)
CD_ToPrototype = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/'

## Cover
CD_ToPrototype_Cover = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/Cover/'

## JUNIWARD
CD_ToPrototype_JUNIWARD = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/JUNIWARD/'

## UERD
CD_ToPrototype_UERD = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/UERD/'

## JMiPOD
CD_ToPrototype_JMiPOD = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/JMiPOD/'

## Test
CD_ToPrototype_Test = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/Test/'



def verification_Input(CD_ToPrototype_Cover,CD_ToPrototype_JUNIWARD,CD_ToPrototype_UERD,CD_ToPrototype_JMiPOD,CD_ToPrototype_Test,trainSample,testSample):

    import os
    
    print('\nVerification for Input 1: Image sample to prototype')
    
    InputList = [CD_ToPrototype_Cover]+[CD_ToPrototype_JUNIWARD]+[CD_ToPrototype_UERD]+[CD_ToPrototype_JMiPOD]

    filesNumber = []

    print('\n Process: Non-Test Image verification status')

    #### Non-Test Image verification status
    
    for i in InputList:

        filesList = os.listdir(i)
        filesNumber = filesNumber + [len(filesList)]
        print('\n   Directory: ',i)
        print('\n   Number of files:',len(filesList))

    if sum(filesNumber)/4 == trainSample:
        print('\n   ||Input Non-Test-Image verification status: Ok')
    else:
        print('\n   ||Files verfication status: Required to be reviewed')

    #### Test Image verification status    

    i = os.listdir(CD_ToPrototype_Test)
    
    print('\n Process: Test Image verification status')
    print('\n   Directory: ',CD_ToPrototype_Test)
    print('\n   Number of files:',len(i))

    if len(os.listdir(CD_ToPrototype_Test)) == testSample:
        print('\n   ||Input Test-Image verfication status: Ok')
    else:
        print('\n   ||Files verfication status: Required to be reviewed')
    
    return
    
"""
Run
"""
print('\n==================================================================================================================================================')
verification_Input(CD_ToPrototype_Cover,CD_ToPrototype_JUNIWARD,CD_ToPrototype_UERD,CD_ToPrototype_JMiPOD,CD_ToPrototype_Test,trainSample,testSample)
print('\n==================================================================================================================================================')    
    
"""
Output 1: Output Directory and expected files (/kaggle/working/)
"""

# /kaggle/working

KW = '/kaggle/working/'

# Generic Directory: OutPut 

GD = '/kaggle/working/output/'

# Normalized Images (Generic)
NI = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/'

## Normalized Images | Cover
NI_Cover = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Cover_ImageNormalized/'

## Normalized Images | JUNIWARD
NI_JUNIWARD = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JUNIWARD_ImageNormalized/'

## Normalized Images | UERD
NI_UERD = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/UERD_ImageNormalized/'

## Normalized Images | JMiPOD
NI_JMiPOD = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JMiPOD_ImageNormalized/'

## Normalized Images | Test
NI_Test = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Test_ImageNormalized/'

### Files Verification


def verification_Output1(NI_Cover,NI_JUNIWARD,NI_UERD,NI_JMiPOD,NI_Test,trainSample,testSample):

    import os
    
    print('\nVerification for Output 2: Image instances after being normalized')
    
    OutputList = [NI_Cover]+[NI_JUNIWARD]+[NI_UERD]+[NI_JMiPOD]

    filesNumber = []

    print('\n Process: Non-File Test verification status')

    #### Non-File Test verification status
    
    for i in OutputList:

        filesList = os.listdir(i)
        filesNumber = filesNumber + [len(filesList)]
        print('\n   Directory: ',i)
        print('\n   Number of files:',len(filesList))

    if sum(filesNumber)/4 == trainSample:
        print('\n   ||Input non-Test Files verfication status: Ok')
    else:
        print('\n   ||Files verfication status: Required to be reviewed')

    #### File Test verification status    

    i = os.listdir(NI_Test)
    
    print('\n Process: File Test verification status')
    print('\n   Directory: ',NI_Test)
    print('\n   Number of files:',len(i))

    if len(os.listdir(NI_Test)) == testSample:
        print('\n   ||Input Test Files verfication status: Ok')
    else:
        print('\n   ||Files verfication status: Required to be reviewed')
    
    return

"""
Run
"""
print('\n==================================================================================================================================================')
verification_Output1(NI_Cover,NI_JUNIWARD,NI_UERD,NI_JMiPOD,NI_Test,trainSample,testSample)
print('\n==================================================================================================================================================')

"""
Output directory viewer
"""


a = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/JMiPOD/'
   
b = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/JUNIWARD/'
   
c = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/UERD/'
    
d = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/Cover/'
 
e = '/kaggle/working/input/inputtoprototype-alaska2imagesteganalysis/Test/'



#################################################################

a1 = '/kaggle/working/OutPut'

a2 = '/kaggle/working/OutPut/JMiPOD/'

a3 = '/kaggle/working/OutPut/JUNIWARD/'

a4 = '/kaggle/working/OutPut/UERD/'

a5 = '/kaggle/working/OutPut/Cover/'

a6 = '/kaggle/working/OutPut/Test/'

In [None]:
"""
Create directories to prototype with input image files of the competition data (prototype outputs | scores)
"""

# Function directoryCreation(directoryList)

def directoryCreation(directoryList):
    
    import os

    print('\n*******************************************************************************************************************')
    print('Created or existing directories')
    print('*******************************************************************************************************************')
    
    for i in directoryList:
    
        try:
            os.mkdir(i)
        except FileExistsError: # directory already exists
            pass
        
        print('\n Created or existing directory: ',i)
    
    print('\nDirectories are created and available to upload data...')
    
    return

# Run

## Required directories | Batch 1: Scores

path_generic1 = '/kaggle/working/output/'
path_generic2 = '/kaggle/working/output/scores/'

path_ImageSimilarity = '/kaggle/working/output/scores/ImageSimilarity/'
path_ImageSimilarityScore = '/kaggle/working/output/scores/ImageSimilarityScore/'
path_PixelDeviationScore = '/kaggle/working/output/scores/PixelDeviationScore/'
path_Fusion = '/kaggle/working/output/scores/Fusion/'

directoryList1 = [path_generic1]+[path_generic2]+[path_ImageSimilarity]+[path_ImageSimilarityScore]+[path_PixelDeviationScore]+[path_Fusion]
directoryListG1 = [path_ImageSimilarity]+[path_ImageSimilarityScore]+[path_PixelDeviationScore]

print('\n*******************************************************************************************************************')
print('Required directories to create | Batch 1: Scores')
print('*******************************************************************************************************************')
print('\nDirectories to create:','\n', directoryList)

directoryCreation(directoryList1)
directoryList1.remove(path_generic1)
directoryList1.remove(path_generic2)

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 08 13:38:00 2020

@author: Mauricio Azálgara Bedoya

Kaggle user account: mazalgarab

"""

"""
KaggleSolution2.py
Version adapted from orginal version
"""


def imageio_path_input_list():
    
    coverImageTypeList = ['JMiPOD','JUNIWARD','UERD','Cover']
    
    imageio_path_input_list = []
    root = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/'
    
    
    for i in coverImageTypeList:
        
        component = root + i + '_ImageNormalized/'
        imageio_path_input_list = imageio_path_input_list + [component]
    
    return imageio_path_input_list


def imageioFile_path(coverImageType):

    coverImageTypeList = ['JMiPOD','JUNIWARD','UERD','Cover', 'Test']
    
    if(coverImageType == coverImageTypeList[0]):
        imageioFile_path = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JMiPOD_ImageNormalized/'
        
    if(coverImageType == coverImageTypeList[1]):
        imageioFile_path = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JUNIWARD_ImageNormalized/'
    if(coverImageType == coverImageTypeList[2]):
        imageioFile_path = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/UERD_ImageNormalized/'
    if(coverImageType == coverImageTypeList[3]):
        imageioFile_path = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Cover_ImageNormalized/'
    if(coverImageType == coverImageTypeList[4]):
        imageioFile_path = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Test_ImageNormalized/'
    
    return imageioFile_path


def imageio_list_bulk(imageioFile_path):
    
    import os
    
    imageio_list_bulk = []
    
    for i in [imageioFile_path]:
    
        component = os.listdir(i)
        imageioFile_list_bulk = imageio_list_bulk + component
    
    return imageioFile_list_bulk


def PixelDeviationScore(start,termination,joinBaseField):

    # TargetField
    
    BaseField = 'Cover'
    TargetField = ['JMiPOD','JUNIWARD','UERD']
    
    import os
    import pandas as pd

    # Read CSV files

    root_baseFieldInput = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Cover_ImageNormalized/' #imageioFile_path(BaseField)
    root_targeFieldtInput1 = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JMiPOD_ImageNormalized/' #imageioFile_path(TargetField)
    root_targeFieldtInput2 = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JUNIWARD_ImageNormalized/' #imageioFile_path(TargetField)
    root_targeFieldtInput3 = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/UERD_ImageNormalized/' #imageioFile_path(TargetField)
    root_output = '/kaggle/working/output/scores/PixelDeviationScore/'
    
    filenameBaseFieldList = imageio_list_bulk(root_baseFieldInput)
    
    # Set batch
    filenameBaseFieldList = filenameBaseFieldList[start:termination]
    
    
    for i in filenameBaseFieldList:
        
        BaseField = pd.read_csv(root_baseFieldInput+i,index_col=None, header=None)
        TargetField1 = pd.read_csv(root_targeFieldtInput1+i,index_col=None, header=None)
        TargetField2 = pd.read_csv(root_targeFieldtInput2+i,index_col=None, header=None)
        TargetField3 = pd.read_csv(root_targeFieldtInput3+i,index_col=None, header=None)
        
        # PixelDeviationScore computation
        PixelDeviationScore1 = abs(BaseField - TargetField1)
        PixelDeviationScore2 = abs(BaseField - TargetField2)
        PixelDeviationScore3 = abs(BaseField - TargetField3)
        
        PixelDeviationScore = (PixelDeviationScore1 + PixelDeviationScore2 + PixelDeviationScore3)/(3*BaseField)
        
        # Build DataFrame
        df = PixelDeviationScore
        
        # Join BaseField
        if joinBaseField == True:
            df = pd.merge(BaseField,df,left_index=True, right_index=True)
            df.to_csv(root_output+i,sep=',',index=False,header=False)
            print('File created (Pixel Deviation Score): '  ,i)
        else:
            df.to_csv(root_output+i,sep=',',index=False,header=False)
            print('File created (Pixel Deviation Score): '  ,i)


    return # print('Files generated:', os.listdir(root_output))


def round_up(n, decimals=0):
    
    import math
    
    ## Sourced (Amos, n.d.)
    ## Sourced (Solving ValueError: cannot convert float NaN to integer, n.d.)
    
    multiplier = 10 ** decimals
    
    try:
        result = math.ceil(n * multiplier) / multiplier
    except ValueError:
        result = int(0)
    except OverflowError:
        result = int(1)
    
    return result


def round_up_df(df, decimals=0):
    
    row,col = df.shape
    
    for i in range(0,col):
        
        for j in range(0,row):
        
            if df[i][j] != 0: df[i][j] = round_up(df[i][j],0)
    
    return df


def imageio_similarity_score(df):
               
    # Requires a binary Dataframe
    
    mean_series = df.mean()
    score = mean_series.mean()
    
    return score


def image_fusion_score(df):
    
    # Theory 1: Average score per value within triad sets of the RGB image (Assumption: Predominant color)
    
    rows,cols = df.shape
    
    resultCols = []
    resultTriad_1 = 0
    resultTriad_2 = 0
    resultTriad_3 = 0
    numy = 0
    
    for i in range (0,cols):
        
        component = df[i].mean()
        resultCols = resultCols + [component]
    
    limit = int(cols/3)
    
    for i in range(0,limit):    
   
        x = i + numy
        y = i + 1 + numy
        z = i + 2 + numy
                
        resultTriad_1 = resultTriad_1 + resultCols[x]
        resultTriad_2 = resultTriad_2 + resultCols[y]
        resultTriad_3 = resultTriad_3 + resultCols[z]
        
        numy = 2 * (i+1)
    
    triad = [int(resultTriad_1/cols)] + [int(resultTriad_2/cols)] + [int(resultTriad_3/cols)]
            
    return triad


def SimilarityScore(start,termination):

    import os
    import pandas as pd
    import numpy as np

    # Define data roots     

    root_input = '/kaggle/working/output/scores/PixelDeviationScore/'    
    root_input_basefield = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Cover_ImageNormalized/'
    root_output = '/kaggle/working/output/scores/ImageSimilarity/'
    root_output_sc = '/kaggle/working/output/scores/ImageSimilarityScore/'
    
    
        
    # Set the input files (PixelDeviationScore)
    
    filenameBaseFieldList = imageio_list_bulk(root_input)
    filenameBaseFieldList = filenameBaseFieldList[start:termination]
    
    ImageioSimilarityScoreList = []
    ImageFusionScoreList_R = []
    ImageFusionScoreList_G = []
    ImageFusionScoreList_B = []
    
    selectedCols1 = []
    selectedCols2 = []
    newColumnsNames = {}
    
    
    # Create List
    
    for i in range (300,600):
        #num = str(i)
        selectedCols1 = selectedCols1 + [i]
        selectedCols2 = selectedCols2 + [i-300]
        newColumnsNames[i]=i-300
        
    # Run Loop
    
    #print('\n Cover images processed (i.e. BaseField) as follows: ')
    #print('\n')
    
    for i in filenameBaseFieldList:
        
        # ImageSimilarityScore Case
        
        dfImageio = pd.read_csv(root_input+i,index_col=None, header=None, usecols=selectedCols1) #Imported columns from 301 to 600 
        dfImageio = dfImageio.rename(columns=newColumnsNames)
        dfImageio = round_up_df(dfImageio,0)
        df = dfImageio
        df.to_csv(root_output+i,sep=',',index=False,header=False) #Binary DataFrame
        ImageioSimilarityScore = imageio_similarity_score(df)
        ImageioSimilarityScoreList = ImageioSimilarityScoreList + [ImageioSimilarityScore]
        
        # ImageFusionScore Case
        
        dfBaseField = pd.read_csv(root_input_basefield+i,index_col=None, header=None)
        image_fusion_score_array = image_fusion_score(dfBaseField)
        ImageFusionScoreList_R = ImageFusionScoreList_R + [image_fusion_score_array[0]]
        ImageFusionScoreList_G = ImageFusionScoreList_G + [image_fusion_score_array[1]]
        ImageFusionScoreList_B = ImageFusionScoreList_B + [image_fusion_score_array[2]]
        #print(i)
        
    # Create array for lists: (1) ImageioSimilarityScoreList; (2) ImageFusionScoreList
    
    #arrayScore = np.array([ImageFusionScoreList]+[ImageioSimilarityScoreList])
    #df2 = df2.T
    
    arrayScore1R = np.array(ImageFusionScoreList_R)
    arrayScore1G = np.array(ImageFusionScoreList_G)
    arrayScore1B = np.array(ImageFusionScoreList_B)
    arrayScore2 = np.array(ImageioSimilarityScoreList)
    
    # Create DataFrame of the image set (batch) for image pair index: (image_fusion_score,imageio_similarity_score)
    
    df2_arrayScore1R = pd.DataFrame(arrayScore1R)
    df2_arrayScore1G = pd.DataFrame(arrayScore1G)
    df2_arrayScore1B = pd.DataFrame(arrayScore1B)
    df2_arrayScore2 = pd.DataFrame(arrayScore2)
    
    df2_left = pd.merge(df2_arrayScore1R,df2_arrayScore1G,left_index=True, right_index=True)
    df2_left = pd.merge(df2_left,df2_arrayScore1B,left_index=True, right_index=True)
    df2 = pd.merge(df2_left,df2_arrayScore2,left_index=True, right_index=True)
    
    # Create CSV file | DataFrame of the image set (batch) for image pair index: (image_fusion_score,imageio_similarity_score)
    
    batch = 'batch_'+str(start)+'-'+str(termination)+'.csv'
    df2.to_csv(root_output_sc+batch,sep=',',index=False,header=False)    

    return # print('Files generated:', os.listdir(root_output))


def ImageFusionScoreTest(start,termination):
    
    import os
    import pandas as pd
    import numpy as np

    # Define data roots     

    root_input_test = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Test_ImageNormalized/'
    root_output_f = '/kaggle/working/output/scores/Fusion/'
    
    # Set the input files (PixelDeviationScore)
    
    filenameTestList = imageio_list_bulk(root_input_test)
    #filenameTestList.remove('Fusion') 
    
    ImageFusionScoreList_R = []
    ImageFusionScoreList_G = []
    ImageFusionScoreList_B = []
    
    filenameTestList = filenameTestList[start:termination]
    
    # Run Loop
    
    for i in filenameTestList:
        
        # ImageFusionScore Case
        
        dfTest = pd.read_csv(root_input_test+i,index_col=None, header=None)
        image_fusion_score_array = image_fusion_score(dfTest)
        ImageFusionScoreList_R = ImageFusionScoreList_R + [image_fusion_score_array[0]]
        ImageFusionScoreList_G = ImageFusionScoreList_G + [image_fusion_score_array[1]]
        ImageFusionScoreList_B = ImageFusionScoreList_B + [image_fusion_score_array[2]]
        
    # Create array for lists: ImageFusionScoreList
    
    arrayScoreR = np.array(ImageFusionScoreList_R)
    arrayScoreG = np.array(ImageFusionScoreList_G)
    arrayScoreB = np.array(ImageFusionScoreList_B)
    
    # Create DataFrame of the image set (batch) for image pair index: (image_fusion_score,imageio_similarity_score)
    
    df2_arrayScoreR = pd.DataFrame(arrayScoreR)
    df2_arrayScoreG = pd.DataFrame(arrayScoreG)
    df2_arrayScoreB = pd.DataFrame(arrayScoreB)
    
    df2_left = pd.merge(df2_arrayScoreR,df2_arrayScoreG,left_index=True, right_index=True)
    df2 = pd.merge(df2_left,df2_arrayScoreB,left_index=True, right_index=True)
    
    # Create CSV file | DataFrame of the image set (batch) for image pair index: (image_fusion_score,imageio_similarity_score)
    
    batch = 'batch_'+str(start)+'-'+str(termination)+'.csv'
    df2.to_csv(root_output_f+batch,sep=',',index=False,header=False)    
    
    return # print('Files generated:', os.listdir(root_output_f))


"""
Re set initial sample size
"""
print('\n************************************************************************************************************************************')
trainSample = input('\nPlease re insert the initial sample-number of instances for training (0-1000) non-test-images:')
trainSample = int(trainSample)
testSample = input('\nPlease re insert the initial sample-number of instances for testing (0-5000) test-images:')
testSample = int(testSample)
print('\n************************************************************************************************************************************')


def filesVerification():
    
    # Existing Files Verification

    import os
    
    ## Input files (image normalized)
    input_root1 = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Cover_ImageNormalized/'
    input_root2 = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JMiPOD_ImageNormalized/'
    input_root3 = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JUNIWARD_ImageNormalized/'
    input_root4 = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/UERD_ImageNormalized/'
    inputList = [input_root1] + [input_root2] + [input_root3] + [input_root4]
    
    ## Input files (image normalized) | Test case
    input_root_test = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Test_ImageNormalized/'    
    
    ## Output roots
    output_root1 = '/kaggle/working/output/scores/PixelDeviationScore/'    
    output_root2 = '/kaggle/working/output/scores/ImageSimilarity/'    
    outputList = [output_root1] + [output_root2]
    
    ## Output roots | Batch case
    output_rootB1 = '/kaggle/working/output/scores/ImageSimilarityScore/'    
    output_rootB2 = '/kaggle/working/output/scores/Fusion/'    
    outputListB = [output_rootB1] + [output_rootB2]
    
    ## Input-files list verification
    inputListEvaluation = []
    
    for i in inputList:
        
        numy = os.listdir(i)
        
        if len(numy) == trainSample:
            inputListEvaluation = inputListEvaluation + [True]
        else:
            inputListEvaluation = inputListEvaluation + [False]
    
    
    ## Input-files list verification | test case
    numy = os.listdir(input_root_test)
    #numy.remove('Fusion')
    
    if len(numy) == testSample:
        inputListEvaluation = inputListEvaluation + [True]
    else:
        inputListEvaluation = inputListEvaluation + [False]
    
    
    ## Output-files list verification
    outputListEvaluation = []
    numyBase =  os.listdir(outputList[1]) # Picked directory (criterion: files generation order): 2
        
    for i in outputList:
        
        numy = os.listdir(i)
        
        if len(numy) == len(numyBase):
            outputListEvaluation = outputListEvaluation + [True]
        else:
            outputListEvaluation = outputListEvaluation + [False]        
    
    
    ## Output-files list verification | Batch case
    numyBaseB =  os.listdir(outputListB[1]) # Picked directory (criterion: files generation order): 2
    
    for i in outputListB:
    
        numy = os.listdir(i)
    
        if len(numy) == len(numyBaseB):
            outputListEvaluation = outputListEvaluation + [True]
        else:
            if 0 >= len(numyBaseB)-testSample:
                outputListEvaluation = outputListEvaluation + [True]
            else:
                outputListEvaluation = outputListEvaluation + [False]
    
    
    ## Results
    ResultsOutput = []    
    inputList = inputList + [input_root_test]
    place = 0
    for i in  inputListEvaluation:
        
        if i != True:      
            print('\n --------------------------------------------')
            print('\n Input files does not meet the requirements.')
            print('\n Please check: ',inputList[place])
            print('\n --------------------------------------------')
            ResultsOutput = ResultsOutput + [False]
            break
    
        place = place + 1
        
    if (place == len(inputListEvaluation)):
        print('\n --------------------------------------------')
        print('\n Input files status: OK')
        print('\n --------------------------------------------')
        ResultsOutput = ResultsOutput + [True]
    
    outputList = outputList + outputListB
    place = 0
    for i in  outputListEvaluation:
        if i != True:      
            print('\n --------------------------------------------')
            print('\n Output files does not meet the requirements.')
            print('\n Please check: ',outputList[place])
            print('\n --------------------------------------------')
            ResultsOutput = ResultsOutput + [False]
            break
    
        place = place + 1
    
    if (place == len(outputListEvaluation)):
        print('\n --------------------------------------------')
        print('\n Output files status: OK')
        print('\n --------------------------------------------')
        ResultsOutput = ResultsOutput + [True]

    return ResultsOutput

def AUC_filtering():
    
    return


"""
Kaggle Solution scheme
"""

# Milestone 1: P(E) = 1 - weighted similarity score
# Milestone 2: Build Model | Cover + JMiPOD + JUNIWARD + UERD = P(E)
# Milestone 3: Evaluate Test
# Milestone 4: Accomplishment level of AUC constraint (i.e. AUC filtering)
# Kaggle Submission


"""
Batching testing mode
""" 
# Set Batch | Training dataset
print('\n Please set a batch scope of 1000 units.')
print('\n Remark 1: Do not insert the same number, please.')
print('\n Remark 2: Images Normalized to be fused from test data set allow batch numbers from 0 to 5001 batches')
start = input('\n Enter the batch-start number (training dataset): ')
termination = input('\n Enter the batch-termination number (training dataset): ')
start = int(start)
termination = int(termination)
joinBaseField = True


# Set Batch | Testingining dataset
print('\n Please set a batch scope of 1000 units.')
print('\n Remark 1: Do not insert the same number, please.')
print('\n Remark 2: Images Normalized to be fused from test data set allow batch numbers from 0 to 5001 batches')
startT = input('\n Enter the batch-start number (testing dataset): ')
terminationT = input('\n Enter the batch-termination number (testing dataset): ')
startT = int(startT)
terminationT = int(terminationT)
joinBaseField = True

# Verification
a,b = filesVerification()
#a, b = True, True

# Files creation | Training

if (a == True and b == True):
    
    for i in range(start,termination):
    
        PixelDeviationScore(i,i+1,joinBaseField)
        SimilarityScore(i,i+1)

    print('\n Processed Batches (Training Dataset): '+'From '+str(start)+' to '+ str(termination-1))
    print('\n Work done.')
    print('\n --------------------------------------------')
    print('\n Start next time from the following batch number: '+ str(termination))
    print('\n --------------------------------------------')
    print('\n --------------------------------------------')
        
# Files creation | Testing
        
if (a == True and b == True):        
        
    for i in range(startT,terminationT):
        
        if (i<= 5001):ImageFusionScoreTest(i,i+1)
        print('Processed Batch:', str(i), ' of ', str(terminationT-1), 'ones.')
      
    print('\n Processed Batches (Testing Dataset): '+'From '+str(startT)+' to '+ str(terminationT-1))
    print('\n Work done.')
    print('\n --------------------------------------------')
    print('\n Start next time from the following batch number: '+ str(terminationT))
    print('\n --------------------------------------------')
    print('\n --------------------------------------------')

"""
Bibliography:

    
Refrences:

Amos,D. n.d. How to Round Numbers in Python. Retrieved from
    https://realpython.com/python-rounding/

Solving ValueError: cannot convert float NaN to integer. Retrieved from
    https://stackoverflow.com/questions/59390764/solving-valueerror-cannot-convert-float-nan-to-integer


"""

In [None]:
"""
Outcome Verifier of KaggleSolution2.py
"""
import os

print('\n******************')
print('Input files')
print('******************')

print('\nImage Normalized | Cover')
rootToTest = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Cover_ImageNormalized/'
numy = os.listdir(rootToTest)
print(numy)

print('\nImage Normalized | JMiPOD')
rootToTest = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JMiPOD_ImageNormalized/'
numy = os.listdir(rootToTest)
print(numy)

print('\nImage Normalized | JUNIWARD')
rootToTest = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/JUNIWARD_ImageNormalized/'
numy = os.listdir(rootToTest)
print(numy)

print('\nImage Normalized | UERD')
rootToTest = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/UERD_ImageNormalized/'
numy = os.listdir(rootToTest)
print(numy)

print('\nImage Normalized | Test')
rootToTest = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Test_ImageNormalized/'
numy = os.listdir(rootToTest)
print(numy)

print('\n******************')
print('Output files')
print('******************')

print('\nPixel Deviation Score | Matrix 1 (deviation-values matrix): 300x100 | Matrix 2:300x100 (deviation-score-values matrix)')
rootToTest = '/kaggle/working/output/scores/PixelDeviationScore/'
numy = os.listdir(rootToTest)
print(numy)

print('\nImage Similarity | Binary Matrix (0:non-altered value | 1:altered value)')
rootToTest = '/kaggle/working/output/scores/ImageSimilarity/' 
numy = os.listdir(rootToTest)
print(numy)

print('\nImage Similarity Score (RGB triads for non-Test)')
rootToTest = '/kaggle/working/output/scores/ImageSimilarityScore/'
numy = os.listdir(rootToTest)
print(numy)

print('\nFusion (RGB triads for Test)')
rootToTest = '/kaggle/working/output/scores/Fusion/' 
numy = os.listdir(rootToTest)
print(numy)
 

In [None]:
"""
Create directories to prototype with input image files of the competition data (prototype outputs | consolidation)
"""

# Function directoryCreation(directoryList)

def directoryCreation(directoryList):
    
    import os

    print('\n*******************************************************************************************************************')
    print('Created or existing directories')
    print('*******************************************************************************************************************')
    
    for i in directoryList:
    
        try:
            os.mkdir(i)
        except FileExistsError: # directory already exists
            pass
        
        print('\n Created or existing directory: ',i)
    
    print('\nDirectories are created and available to upload data...')
    
    return

## Required directories | Batch 2: Consolidation

path_generic1 = '/kaggle/working/output/'
path_generic3 = '/kaggle/working/output/consolidation/'

path_ImageSimilarityScoreConsolidation = '/kaggle/working/output/consolidation/ImageSimilarityScoreConsolidation/'
path_FusionConsolidation = '/kaggle/working/output/consolidation/FusionConsolidation/'
path_Submission = '/kaggle/working/output/consolidation/Submission/'

directoryList2 = [path_generic1]+[path_generic3]+[path_ImageSimilarityScoreConsolidation]+[path_FusionConsolidation]+[path_Submission]
directoryListG2 = [path_ImageSimilarityScoreConsolidation]+[path_FusionConsolidation]+[path_Submission]

print('\n*******************************************************************************************************************')
print('Required directories to create | Batch 2: Consolidation')
print('*******************************************************************************************************************')
print('\nDirectories to create:','\n', directoryList2)

directoryCreation(directoryList2)
directoryList2.remove(path_generic1)
directoryList2.remove(path_generic3)

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Thu May 30 11:49:00 2020

@author: Mauricio Azálgara Bedoya

Kaggle user account: mazalgarab

"""

"""
AUCEvaluation.py
Version adapted from orginal version
Part 1: Batches' consolidation
"""


"""
Tensorflow Approach | AUC (Area under the curve) via a Riemann sum
"""

def AUCvalue(num_thresholds,thresholds,y_true,y_pred):

    # tf.keras.metrics.AUC
    ## Remark: weights = [2, 1] not included
    import tensorflow as tf
    
    m = tf.keras.metrics.AUC(
        num_thresholds=3, curve='ROC', summation_method='interpolation', name=None,
        dtype=None, thresholds=[0.0, 0.4, 1.0], multi_label=False, label_weights=None
    )
    _ = m.update_state(y_true, y_pred)
    
    return m.result().numpy()

def AUCFiltering():
    
    import pandas as pd
    
    # Load Predictions file as DataFrame
    
    rootInput = 'C:/Users/mazal/Downloads/ALASKA2 Image Steganalysis/DataToPrototype/TestPrototype2/Output/Consolidation/Test_Predictions/'
    InputFileName = 'Predictions.csv'
    df = pd.read_csv(rootInput+InputFileName)
    
    # Set List of Prediction Models
    PredictionModelList = list(df.columns)
    PredictionModelList.remove('Feature 1')
    PredictionModelList.remove('Feature 2')
    PredictionModelList.remove('Feature 3')
    items = len(PredictionModelList)
    itemList = []
    
    for i in range(0,items): itemList = itemList + [i]
    
    d = {'Item':itemList,'Model':PredictionModelList}
    dfModel = pd.DataFrame(d)
    
    # Select Models
    
    print('\Prediction Model List:')
    print(dfModel)
    print('\nInsert 2 numbers corresponding to 2 predicion models to be AUC filtered:')
    model1 = input('\nModel 1: ')
    model2 = input('\nModel 2: ')
    
    # AUC values
    ## AUC values should turn out over 0.85.
    
        
    return print(model1, model2)

def batchMerger(start, termination, test):
    
    import pandas as pd
    import os
        
    # test = 0 -> work with non-Test dataset (Image Normalized and Fused | ImageSimilarityScore computed from Cover, JMiPOD, UERD, JUNIWARD)
    # test = 1 -> work with Test dataset (Image Normalized and Fused)
    
    # Set working directory
    path = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/'
    os.chdir(path)
    
    # Set input directory
    if(test == 0):
        rootInput = '/kaggle/working/output/scores/ImageSimilarityScore/'
    else:
        rootInput = '/kaggle/working/output/scores/Fusion/' 
                
    # Set output directory
    if(test == 0): 
        rootOutput = '/kaggle/working/output/consolidation/ImageSimilarityScoreConsolidation/'
        datasetName = 'Image Normalized and Fused from Cover, JMiPOD, UERD, and JUNIWARD'
    else:
        rootOutput = '/kaggle/working/output/consolidation/FusionConsolidation/'
        datasetName = 'Image Normalized and Fused from Test'
    
    # Consolidation
    dfList = []
    numy = 1
    if test == 0:
        T = 'Non-Test'
    else:
        T = 'Test'
            
    for i in range(start,termination):
        
                
        InputFileName = 'batch_'+str(i)+'-'+str(i+1)+'.csv'
        df = pd.read_csv(rootInput+InputFileName,index_col=None, header=None)
        dfList = dfList + [df]
        print('Processed Batches', T,':', numy, ' from ', termination, ' ones.')
        numy = numy + 1
        
    df = pd.concat(dfList, ignore_index=False, keys=None,levels=None, names=None, verify_integrity=False, copy=True)
    
    # Create consolidated file
    fileName = 'batch_'+str(start)+'-'+str(termination)+'.csv'
    print('\nDataset:',datasetName,'\nNumber of merged instances: ', str(termination-start), '\nInstances available at: ', rootOutput)
    df.to_csv(rootOutput+fileName,sep=',',index=False,header=False)
    
    return 


"""
Batching
"""
print('\n*******************************************************************')

# Set Batch Training Data Set
print('\n Please set a batch scope to include as a training data set:')
print('\n Upper limit: 1000')
print('\n Lower limit: 0')

startTrain = input('\n Enter the batch-start number: ')
terminationTrain = input('\n Enter the batch-termination number: ')
startTrain = int(startTrain)
terminationTrain = int(terminationTrain)

print('\n*******************************************************************')

# Set Batch Training Data Set
print('\n Please set a batch scope to include as a testing data set:')
print('\n Upper limit: 5000')
print('\n Lower limit: 0')

startTest = input('\n Enter the batch-start number: ')
terminationTest = input('\n Enter the batch-termination number: ')
startTest = int(startTest)
terminationTest = int(terminationTest)

print('\n*******************************************************************')

# Running inputs

start = startTrain
termination = terminationTrain
test = 0
batchMerger(start,termination,test)
print('\n*******************************************************************')
start = startTest
termination = terminationTest
test = 1
batchMerger(start,termination,test)
print('\n*******************************************************************')
"""
Notes:
Other approaches
(1) Best practices for model evaluation and Hyperparameter Tuning (Rashka et al., 2019  pp. 216-218)
(2) Scikit-learn approach (Scikit-learn Developers, 2019, pp. 2013-2015)

Work with the following Tensorflow versions: TensorFlow 2.x

Uninstall tensorflow:
pip uninstall tensorflow
pip uninstall tensorflow-gpu

Install Tensorflow: 
pip install tensorflow
pip install tensorflow-gpu   


"""


"""
Bibliography:
    
Raschka, S., & Mirjalili, V. (2019). Python Machine Learning: Machine Learning
    and Deep Learning with Python, scikit-learn, and TensorFlow 2 (3rd ed.).
    Birmingham, United Kingdom: Packt Publishing Ltd.

Scikit-learn Developers. (2019). Scikit-learn user guide | Release 0.21.3.
    Retrieved from https://scikit-learn.org/stable/_downloads/scikit-learn-docs.pdf

References:

    tf.keras.metrics.AUC. Retrieved from 
    https://www.tensorflow.org/api_docs/python/tf/keras/metrics/AUC

Stackoverflow:
    
    Error on tensorflow cannot import name 'export_saved_model'. Retrieved from
    https://stackoverflow.com/questions/61833301/error-on-tensorflow-cannot-import-name-export-saved-model

    Uninstall tensorflow 2.1.0. Retrieved from
    https://stackoverflow.com/questions/59824224/uninstall-tensorflow-2-1-0

"""

In [None]:
"""
Outcome Verifier of AUCEvaluation.py
"""
import os

print('\n******************')
print('Outcome 1 of AUCEvaluation.py')
print('Datasets to build a prediction model')
print('******************')

print('\nTraining dataset')
rootToTest = '/kaggle/working/output/consolidation/ImageSimilarityScoreConsolidation/'
numy = os.listdir(rootToTest)
print(numy)

print('\nTesting dataset')
rootToTest = '/kaggle/working/output/consolidation/FusionConsolidation/'
numy = os.listdir(rootToTest)
print(numy)

In [None]:
"""
Prediction Model
"""

"""
PredictionModels.py
version adapted from orginal version
"""

# -*- coding: utf-8 -*-
"""
Created on Tue Jul  7 14:09:39 2020

@author: Mauricio Azálgara Bedoya

Kaggle user account: mazalgarab
"""

"""
Random Forest Classification Using scikit-learn
"""

"""
Re set initial sample size
"""
print('\n************************************************************************************************************************************')
trainSample = input('\nPlease re insert the initial sample-number of instances for training (0-1000) non-test-images:')
trainSample = int(trainSample)
testSample = input('\nPlease re insert the initial sample-number of instances for testing (0-5000) test-images:')
testSample = int(testSample)
print('\n************************************************************************************************************************************')

# Set working directory
import os
path = '/kaggle/working/output/consolidation/Submission/'
os.chdir(path)

# Load training dataset
import pandas as pd
import numpy as np
fileNameTrainSample = '/kaggle/working/output/consolidation/ImageSimilarityScoreConsolidation/' + 'batch_0-' + str(trainSample) + '.csv'
trainSample = pd.read_csv(fileNameTrainSample,index_col=None, header=None)
trainSampleFeatures = trainSample.loc[:,[0,1,2]]
trainSampleLabels = trainSample.loc[:,[3]]

# Load testing dataset
fileNameTestSample = '/kaggle/working/output/consolidation/FusionConsolidation/' + 'batch_0-' + str(testSample) + '.csv'
testSample = pd.read_csv(fileNameTestSample,index_col=None, header=None)

# Concat training and testing dataset
#dfT = trainSample + [testSample]
#dfT = pd.concat(df, ignore_index=False, keys=None,levels=None, names=None, verify_integrity=False, copy=True)

# Set features and label

## Training dataset
features = np.array(trainSampleFeatures)
label = np.array(trainSampleLabels)

## Testing dataset
featuresTest = np.array(testSample)

# Train-test splitting
# Source: (Nagy, 2018, pp. 191 - 192)

from sklearn import model_selection

features_train, features_test, label_train, label_test = model_selection.train_test_split(features,label,test_size=0.1)


## Preprocessing task for label_train
### Require to check with Orange prototype
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
label_train = le.fit_transform(label_train)


# Classifier
# Source: (Nagy, 2018, pp. 191 - 192)

from sklearn.ensemble import RandomForestClassifier

## Prediction using just the training dataset
random_forest_classifier = RandomForestClassifier(n_estimators=10,max_depth=5)
random_forest_classifier.fit(features_train, label_train)
labels_predicted = random_forest_classifier.predict(features_test)


## Prediction using just the testing dataset
labels_predicted = random_forest_classifier.predict(featuresTest)

# Kaggle Submission

## List
import os

root = '/kaggle/working/output/prototypeOutput-alaska2imagesteganalysis/Test_ImageNormalized/'
imageList = os.listdir(root)

imageListSubmission = []

for i in imageList:
    file = i[0:4]
    file = str(file)
    file = file + '.jpg'
    imageListSubmission = imageListSubmission + [file]
    
## Submission Sample

## File generation
listSubmission = list(labels_predicted)
data = {'Id':imageListSubmission, 'Label':listSubmission}
df = pd.DataFrame(data)

rootOutput1 = '/kaggle/working/output/consolidation/Submission/'
rootOutput2 = '/kaggle/working/'
fileName = 'submission.csv'
df.to_csv(rootOutput1+fileName,sep=',',index=False)
df.to_csv(rootOutput2+fileName,sep=',',index=False)

print('Kaggle Submission')
print('************************************************************************************************************************************')
print(df)

"""
Bibliography:

Nagy, Z. (2018). Artificial Intelligence and Machine Learning Fundamentals: Develop
    real-world applications powered by the latest AI advances (1st ed.). Birmingham,
    United Kingdom: Packt Publishing Ltd.

"""