In [70]:
import os
import cv2
import numpy as np
from PIL import Image
from enum import Enum
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from scipy.signal import savgol_filter
from scipy.signal import find_peaks


In [84]:
def zero_pad(Y):
    mx, my = Y.shape
    padded_x = ((mx + 7) // 8) * 8
    padded_y = ((my + 7) // 8) * 8
    img = np.zeros((padded_x, padded_y), dtype=np.uint8)
    img[:mx, :my] = Y

    return img

def DCT(img):
    w, h = img.shape
    num_blocks = (w // 8) * (h // 8)
    dct_output = np.zeros((8, 8, num_blocks), dtype=float)

    for ci, (i, j) in enumerate((x, y) for x in range(0, w, 8) for y in range(0, h, 8)):
        dct_output[:, :, ci] = cv2.dct(img[i:i+8, j:j+8].astype(np.float32))  # Ensure input is float for DCT

    return dct_output


def hist(DC, N=2000, show=False):
    hcount, bin_edges = np.histogram(DC, bins=N, density=True)
    if show:
        plt.figure(figsize=(10, 6))
        sns.histplot(x=bin_edges[:-1], weights=hcount, bins=N, kde=True, stat="density")
        plt.title("DC Coefficient Histogram")
        plt.xlabel("DC Coefficient Value")
        plt.ylabel("Density")
        plt.show()
    return hcount, bin_edges

def FFT(hcount):
  hcount -= np.mean(hcount)
  x = np.fft.fft(hcount.astype(float))
  x = abs(np.fft.fftshift(x))
#   x = savgol_filter(x, 11, 2)
  return x


class QualityLevel(Enum):
    ORIGINAL=100
    HIGH=np.random.randint(65,90)
    MEDIUM=np.random.randint(35, 65)
    WORST=np.random.randint(10,35)

def compress(path, quality: QualityLevel = QualityLevel.ORIGINAL):
    img = Image.open(path)
    img = img.convert('RGB')
    img.save('temp_jpeg.jpg',quality = quality.value)
    img = Image.open("temp_jpeg.jpg")
    img.save(path)
    return Image.open(path)

def quality(x, N=2000):
    ref = np.mean(x)
    ref += 0.05
    peaks, _ = find_peaks(x, height=ref, distance=15)
    uncompressed = False
    avg_dist = 0
    if len(peaks) <= 1:
        uncompressed = True
    else:
        distances = np.diff(peaks)
        avg_dist = np.bincount(distances).argmax()

    #print("Most Frequent Distance:", avg_dist if len(peaks) > 1 else "N/A")
    return uncompressed, avg_dist


In [87]:
# Generating training data
uncompressed_data = []
quality_high = []
quality_medium = []
quality_low = []

image_number = 50 # 800 to account for the fact that it's counting from 1

for i in range(0, image_number):
    for j in range(1, 5):              # each image has 4 versions
        file_name = f"{i + 1:04}x4w{j}.png"
        path = os.path.join("./DIV2K_train_LR_wild", file_name)
        
        img = compress(path, QualityLevel.ORIGINAL)

        img = img.convert('YCbCr')
        img = zero_pad(np.array(img)[:,:,0])
        img = img.astype(np.float64)
        dct = DCT(img)[0,0,:]
        hcount, bin_edges = hist(dct)
        fft = FFT(hcount)
        _, avg_peak_dist = quality(fft)
        uncompressed_data.append(avg_peak_dist)
        
# High-quality images
for i in range(0, image_number):
    for j in range(1, 5):
        file_name = f"{i + 1:04}x4w{j}.png"
        
        path = os.path.join("DIV2K_train_LR_wild", file_name)
        
        img = compress(path, QualityLevel.HIGH)
        img = img.convert('YCbCr')
        img = zero_pad(np.array(img)[:,:,0])
        img = img.astype(np.float64)
        dct = DCT(img)[0,0,:]
        hcount, bin_edges = hist(dct)
        fft = FFT(hcount)
        _, avg_peak_dist = quality(fft)
        quality_high.append(avg_peak_dist)

# Medium-quality images
for i in range(0, image_number):
    for j in range(1, 5):
        file_name = f"{i + 1:04}x4w{j}.png"
        path = os.path.join("DIV2K_train_LR_wild", file_name)
        
        img = compress(path, QualityLevel.MEDIUM)
        img = img.convert('YCbCr')
        img = zero_pad(np.array(img)[:,:,0])
        img = img.astype(np.float64)
        dct = DCT(img)[0,0,:]
        hcount, bin_edges = hist(dct)
        fft = FFT(hcount)
        _, avg_peak_dist = quality(fft)
        quality_medium.append(avg_peak_dist)

# Low-quality images
for i in range(0, image_number):
    for j in range(1, 5):
        file_name = f"{i + 1:04}x4w{j}.png"
        path = os.path.join("DIV2K_train_LR_wild", file_name)
        
        img = compress(path, QualityLevel.WORST)
        img = img.convert('YCbCr')
        img = zero_pad(np.array(img)[:,:,0])
        img = img.astype(np.float64)
        dct = DCT(img)[0,0,:]
        hcount, bin_edges = hist(dct)
        fft = FFT(hcount)
        _, avg_peak_dist = quality(fft)
        quality_low.append(avg_peak_dist)

quality_map = {1: "High Quality", 2: "Medium Quality", 3: "Low Quality"}

correct_predictions = 0
number_of_tests = 400
testing_cutoff = 0 # images from 701 onwards are for testing

index = np.random.choice(np.arange(0, number_of_tests), size=number_of_tests, replace=False)
for i in range(0, number_of_tests):
        print(f"Test number: {i}")

        quality_index = np.random.randint(0, 4)
        
        if index[i] < 100:              # original image
            img_info = uncompressed_data[index[i] + testing_cutoff]
        elif index[i] >= 100 and index[i] < 200:            # slightly compressed image
            img_info = quality_high[index[i] - 100 + testing_cutoff]
        elif index[i] >= 200 and index[i] < 300:            # decently compressed image
            img_info = quality_medium[index[i] - 200 + testing_cutoff]
        else: #                             # highly compressed image
            img_info = quality_low[index[i] - 300 + testing_cutoff]
        
        print("\n")
print(f"Testing accuracy: {correct_predictions / number_of_tests}")

print(uncompressed_data)
print(quality_high)
print(quality_medium)
print(quality_low)

Test number: 0


Test number: 1


Test number: 2


Test number: 3


Test number: 4


Test number: 5


Test number: 6


Test number: 7


Test number: 8


Test number: 9


Test number: 10


Test number: 11


Test number: 12


Test number: 13


Test number: 14


Test number: 15


Test number: 16


Test number: 17


Test number: 18


Test number: 19


Test number: 20


Test number: 21


Test number: 22


Test number: 23


Test number: 24


Test number: 25


Test number: 26


Test number: 27


Test number: 28


Test number: 29


Test number: 30


Test number: 31


Test number: 32


Test number: 33


Test number: 34


Test number: 35


Test number: 36


Test number: 37


Test number: 38


Test number: 39


Test number: 40


Test number: 41


Test number: 42


Test number: 43


Test number: 44


Test number: 45


Test number: 46


Test number: 47


Test number: 48


Test number: 49


Test number: 50


Test number: 51


Test number: 52


Test number: 53


Test number: 54


Test number: 55


Te