In [None]:
import numpy as np
import imageio.v2 as imageio
import scipy.stats as stats
import csv
import os
from tqdm import tqdm
from google.colab import drive


drive.mount('/content/drive')


dataset_path = '/content/drive/MyDrive/BossbaseDataset/'
output_csv = '/content/drive/MyDrive/steg_features_full.csv'

#embed  lsb

def embed_lsb(cover_img, payload=0.1, seed=42):
    np.random.seed(seed)
    img = cover_img.copy().astype(np.uint8)
    h, w = img.shape
    num_pixels = h * w
    num_bits = int(num_pixels * payload)

    bits = np.random.randint(0, 2, num_bits, dtype=np.uint8)
    flat_img = img.flatten()
    indices = np.random.choice(num_pixels, num_bits, replace=False)
    flat_img[indices] = (flat_img[indices] & 254) | bits

    return flat_img.reshape(h, w)

#apply transformations
def apply_all_transformations(img):

    transformed = img.copy()
    transformed = np.rot90(transformed, k=-0)
    transformed = np.rot90(transformed, k=-1)
    transformed = np.rot90(transformed, k=2)
    transformed = np.rot90(transformed, k=1)
    transformed = np.fliplr(transformed)
    transformed = np.flipud(transformed)
    transformed = transformed.T
    return transformed

#extract features of images

def extract_histogram_features(img):
    flat = img.flatten()
    hist, _ = np.histogram(flat, bins=256, range=(0,255))
    hist = hist / hist.sum()

    kurt = stats.kurtosis(flat)
    skew = stats.skew(flat)
    std = np.std(flat)
    ran = np.ptp(flat)
    med = np.median(flat)
    geom_med = stats.mstats.gmean(flat + 1)
    diff1 = np.diff(flat)
    diff2 = np.diff(diff1)
    var0 = np.var(flat)
    var1 = np.var(diff1)
    var2 = np.var(diff2)
    mobility = np.sqrt(var1 / var0)
    complexity = np.sqrt(var2 / var1) / mobility if var1 != 0 else 0

    return [kurt, skew, std, ran, med, geom_med, mobility, complexity]


payloads = [0.1, 0.3, 0.5]


header = ['Image','Payload','Kurtosis','Skewness','StdDev','Range','Median','GeomMedian','HjorthMobility','HjorthComplexity']

with open(output_csv, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(header)


    all_images = sorted([f for f in os.listdir(dataset_path) if f.lower().endswith('.pgm')])

    for img_name in tqdm(all_images, desc='Processing images'):
        img_path = os.path.join(dataset_path, img_name)
        img = imageio.imread(img_path)
        if img.ndim == 3:
            img = np.mean(img, axis=2).astype(np.uint8)

        img = img[:512, :512]

        transformed_img = apply_all_transformations(img)

        for p in payloads:
            stego = embed_lsb(transformed_img, payload=p)
            feats = extract_histogram_features(stego)
            writer.writerow([img_name, p] + feats)

print(f"All features extracted and saved to {output_csv}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Processing images: 100%|██████████| 10001/10001 [24:47<00:00,  6.72it/s]

All features extracted and saved to /content/drive/MyDrive/steg_features_full.csv





In [None]:
import pandas as pd
import re

#this cell is to sort the image name numerically
csv_path = '/content/drive/MyDrive/steg_features_full.csv'
sorted_csv_path = '/content/drive/MyDrive/steg_features_full_sorted.csv'

df = pd.read_csv(csv_path)


def extract_num(name):
    match = re.search(r'\d+', str(name))
    return int(match.group()) if match else 0


df['ImageNumber'] = df['Image'].apply(extract_num)
df = df.sort_values(by=['ImageNumber', 'Payload'], ascending=[True, True])


df = df.drop(columns=['ImageNumber'])

n
df.to_csv(sorted_csv_path, index=False)

print(f"Sorted CSV saved to: {sorted_csv_path}")


✅ Sorted CSV saved to: /content/drive/MyDrive/steg_features_full_sorted.csv
