In [1]:
import numpy as np

# Debug library, very useful
from icecream import ic

In [2]:
import os

dir_fake = 'dataset/fake'
dir_real = 'dataset/real'

# Collect all file paths
fake_files = [
    os.path.join(subdir, file)
    for subdir, _, files in os.walk(dir_fake)
    for file in files
]
real_files = [
    os.path.join(subdir, file)
    for subdir, _, files in os.walk(dir_real)
    for file in files
]

In [3]:
import ImageForensics as ifo

N = 300
extract = ifo.FeatureExtraction(features=N)

psd1D_total_fake = extract.fft_modified(fake_files)
psd1D_total_real = extract.fft_modified(real_files)

# Remove None results if any files failed to process
psd1D_total_fake = [result for result in psd1D_total_fake if result is not None]
psd1D_total_real = [result for result in psd1D_total_real if result is not None]

In [None]:
label_total_fake = np.zeros(len(psd1D_total_fake))
label_total_real = np.ones(len(psd1D_total_real))

psd1D_total_final = np.concatenate((psd1D_total_fake, psd1D_total_real), axis=0)
label_total_final = np.concatenate((label_total_fake, label_total_real), axis=0)

In [None]:
features = psd1D_total_final
labels = label_total_final

In [None]:
# Benford's Law for the first digit
digits = np.arange(1, 10)
benford = np.log10(1 + 1 / digits)

In [None]:
# Get first digit of each value
features = [[int(str(value)[0]) for value in array] for array in features]
        
# Count the occurrences of each first digit
first_digits_counts = [
    np.histogram(array, bins=np.arange(1, 11))[0] for array in features
]

In [None]:
import scipy.stats as stats

goodness_of_fit = [
    stats.pearsonr(first_digits_count, benford)
    for first_digits_count in first_digits_counts
]

ALPHA = 0.01

# calculate True Positive, False Positive, True Negative, False Negative
results = [
    (1 - ALPHA >= p_value, labels[i]) for i, (p_value, _) in enumerate(goodness_of_fit)
]

# fake is 0, real is 1
TP = sum(is_legitimate and (label == 1) for is_legitimate, label in results)
FP = sum(is_legitimate and (label == 0) for is_legitimate, label in results)
TN = sum(not is_legitimate and (label == 0) for is_legitimate, label in results)
FN = sum(not is_legitimate and (label == 1) for is_legitimate, label in results)

ic(TP, FP, TN, FN)

precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1 = 2 * (precision * recall) / (precision + recall)
accuracy = (TP + TN) / (TP + TN + FP + FN)

ic(precision, recall, f1, accuracy)