In [38]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from statistics import mean

from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Location Change Score

In [34]:
def getLocationChange(filepath):
    original_features = pd.read_csv("results/original_features.csv", index_col=0)
    original_features = original_features.iloc[:, 1:]
    altered_features = pd.read_csv(filepath, index_col=0)
    altered_features = altered_features.loc[original_features.index, :].iloc[:, 1:]
    #return (abs(original_features-altered_features)/altered_features*100).mean()
    return (100 - abs(altered_features-original_features)/altered_features*100).mean()

## KumarSS

In [40]:
print(mean(round(getLocationChange("results/kumar_features.csv"),2)))

49.791538461538465


## Naseem

In [41]:
print(mean(round(getLocationChange("results/naseem_features.csv"),2)))

100.0


## C4S

In [42]:
print(mean(round(getLocationChange("results/c4s_features.csv"), 2)))

81.53923076923077


# Dispersion Score

In [15]:
original_features = pd.read_csv("results/original_features.csv", index_col=0)
original_features.groupby(by = ["pneumonia"]).min().loc[0, "contrasts_1"]

11.5595405334875

In [10]:
original_features.groupby(by = ["pneumonia"]).max()

Unnamed: 0_level_0,contrasts_1,contrasts_2,contrasts_3,contrasts_4,homogeneitys_1,homogeneitys_2,homogeneitys_3,homogeneitys_4,energys_1,energys_2,energys_3,energys_4,etps
pneumonia,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,87.076024,126.810247,89.791237,125.76527,0.532657,0.454599,0.528822,0.452506,0.002991,0.002244,0.002942,0.002246,7.89142
1,190.266993,210.02167,62.380062,207.443287,0.631807,0.537087,0.633715,0.540251,0.009353,0.009143,0.009495,0.009171,7.57102


In [29]:
def getDispersionScore(filepath):
    features = pd.read_csv(filepath, index_col=0)
    feature_names = features.columns[1:]
    original_features = pd.read_csv("results/original_features.csv", index_col=0)
    features_min = original_features.groupby(by = ["pneumonia"]).min()
    features_max = original_features.groupby(by = ["pneumonia"]).max()
    dispersionScore = []
    for i in range(0,len(feature_names)):
        within=0
        feature_name = feature_names[i]
        feature_min = [features_min.loc[0, feature_name], features_min.loc[1, feature_name]]
        feature_max = [features_max.loc[0, feature_name], features_max.loc[1, feature_name]]
        for idx in features.index:
            feature_value = features.loc[idx, feature_name]
            if features.loc[idx, "pneumonia"] == 0:
                if ((feature_value <= feature_max[0]) and (feature_value >= feature_min[0])):
                    within += 1
            else:
                if ((feature_value <= feature_max[1]) and (feature_value >= feature_min[1])):
                    within += 1
        dispersionScore += [within*100/1000]
    return dispersionScore

## KumarSS

In [43]:
mean(getDispersionScore("results/kumar_features.csv"))

87.79230769230769

## NaseemSS

In [44]:
mean(getDispersionScore("results/naseem_features.csv"))

100.0

## C4S

In [45]:
mean(getDispersionScore("results/c4s_features.csv"))

99.51538461538462

# SVM Classification

In [113]:
def fitSVM(filepath):
    features = pd.read_csv(filepath, index_col=0)
    features['pneumonia'] = features['pneumonia'].astype('category')
    X_train, X_test, y_train, y_test = train_test_split(features.iloc[:, 1:],features.pneumonia, test_size=0.3, random_state=42)
    original_features = pd.read_csv("results/original_features.csv", index_col=0)
    X_test = original_features.iloc[:, 1:].loc[X_test.index]
    clf = svm.SVC(kernel='poly')
    y_pred = clf.fit(X_train, y_train).predict(X_test)
    print("Accuracy = ", round(accuracy_score(y_test, y_pred)*100,2))
    print("Specificity = ", round(recall_score(y_test, y_pred, pos_label=0)*100,2))
    print("Precision = ", round(precision_score(y_test, y_pred)*100,2))
    print("Recall = ", round(recall_score(y_test, y_pred)*100,2))

## Original Cover Image

In [114]:
fitSVM("results/original_features.csv")

Accuracy =  99.0
Specificity =  98.67
Precision =  98.68
Recall =  99.33


## Stego Image with KumarSS

In [115]:
fitSVM("results/kumar_features.csv")

Accuracy =  93.67
Specificity =  96.67
Precision =  96.45
Recall =  90.67


## Stego Image with NaseemSS

In [116]:
fitSVM("results/naseem_features.csv")

Accuracy =  99.0
Specificity =  98.67
Precision =  98.68
Recall =  99.33


## Stego Image with C4S

In [117]:
fitSVM("results/c4s_features.csv")

Accuracy =  66.33
Specificity =  32.67
Precision =  59.76
Recall =  100.0
