In [67]:
import os
import cv2
import numpy as np
from PIL import Image
from enum import Enum
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from scipy.signal import savgol_filter


In [68]:
def zero_pad(Y):
    mx, my = Y.shape
    padded_x = ((mx + 7) // 8) * 8
    padded_y = ((my + 7) // 8) * 8
    img = np.zeros((padded_x, padded_y), dtype=np.uint8)
    img[:mx, :my] = Y
    
    return img

def DCT(img):
    w, h = img.shape
    num_blocks = (w // 8) * (h // 8)
    dct_output = np.zeros((8, 8, num_blocks), dtype=float) 

    for ci, (i, j) in enumerate((x, y) for x in range(0, w, 8) for y in range(0, h, 8)):
        dct_output[:, :, ci] = cv2.dct(img[i:i+8, j:j+8].astype(np.float32))  # Ensure input is float for DCT

    return dct_output


def hist(DC, N=2000, show=False):
    hcount, bin_edges = np.histogram(DC, bins=N, density=True)
    if show:
        plt.figure(figsize=(10, 6))
        sns.histplot(x=bin_edges[:-1], weights=hcount, bins=N, kde=True, stat="density")
        plt.title("DC Coefficient Histogram")
        plt.xlabel("DC Coefficient Value")
        plt.ylabel("Density")
        plt.show()
    return hcount, bin_edges

def FFT(hcount):
  hcount -= np.mean(hcount)
  x = np.fft.fft(hcount.astype(float))
  x = abs(np.fft.fftshift(x))
#   x = savgol_filter(x, 11, 2)
  return x


class QualityLevel(Enum):
    BEST=100
    MEDIUM=50
    WORST=10
    
def compress(index=0,quality: Qualitylevel = Qualitylevel.BEST):
    file_name = f"{index + 1:04}x4w4.png"
    path = os.path.join("DIV2K_train_LR_wild", file_name)
    img = Image.open(path)
    img = img.convert('RGB')
    img.save('temp_jpeg.jpg',quality = quality.value)
    img = Image.open("temp_jpeg.jpg")
    img.save(path)
    return Image.open(path)


In [None]:

def get_avg_peak_distance(img):
    # Converts image to YCbCr, extracts the Y channel, and computes average peak distance in FFT of DC coefficients
    img = img.convert('YCbCr')
    y_channel = zero_pad(np.array(img)[:, :, 0])
    dct = DCT(y_channel)[0, 0, :]
    hcount, _ = hist(dct)
    fft = FFT(hcount)
    
    # Find peak distances
    peaks = np.where(fft > np.mean(fft) + 2 * np.std(fft))[0]
    if len(peaks) > 1:
        avg_peak_distance = np.mean(np.diff(peaks))
    else:
        avg_peak_distance = 0  # Fallback if there are no peaks
    
    return avg_peak_distance

# Generating training data
cat1, cat2, cat3 = 30, 75, 45  # Number of images per category
h, m, l = [], [], []

# High-quality images
for i in range(cat1):
    img = compress(i, quality=QualityLevel.BEST)
    avg_peak_dist = get_avg_peak_distance(img)
    h.append(avg_peak_dist)

# Medium-quality images
for i in range(cat1, cat1 + cat2):
    img = compress(i, quality=QualityLevel.MEDIUM)
    avg_peak_dist = get_avg_peak_distance(img)
    m.append(avg_peak_dist)

# Low-quality images
for i in range(cat1 + cat2, cat1 + cat2 + cat3):
    img = compress(i, quality=QualityLevel.WORST)
    avg_peak_dist = get_avg_peak_distance(img)
    l.append(avg_peak_dist)

# Prepare data for classifier
high_quality_labels = [1] * len(h)
medium_quality_labels = [2] * len(m)
low_quality_labels = [3] * len(l)
quality_map = {1: "High Quality", 2: "Medium Quality", 3: "Low Quality"}

features = np.array(h + m + l).reshape(-1, 1)
labels = np.array(high_quality_labels + medium_quality_labels + low_quality_labels)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Train Naive Bayes classifier
classifier = GaussianNB()
classifier.fit(X_train, y_train)

# Predictions and evaluation
y_pred = classifier.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Test on a new image
def predict_compression_quality(img_path):
    img = Image.open(img_path)
    feature = np.array([[get_avg_peak_distance(img)]])
    predicted_quality = classifier.predict(feature)
    compression_probabilities = classifier.predict_proba(feature)
    
    print(f"Predicted Quality Level: {quality_map[predicted_quality[0]]}")
    print("Compression Probabilities (High, Medium, Low):", compression_probabilities[0])

    return predicted_quality[0]

quality_levels = []

# Testing
for i in range(cat1 + cat2 + cat3, cat1 + cat2 + cat3 + 10):
    img = compress(i, quality=QualityLevel.BEST)
    quality_levels.append(1)

for i in range(cat1 + cat2 + cat3 + 10, cat1 + cat2 + cat3 + 35):
    img = compress(i, quality=QualityLevel.MEDIUM)
    quality_levels.append(2)

for i in range(cat1 + cat2 + cat3 + 35, cat1 + cat2 + cat3 + 50):
    img = compress(i, quality=QualityLevel.WORST)
    quality_levels.append(3)
    
correct_predictions = 0
for i in range(cat1 + cat2 + cat3, 200):
    file_name = f"{i + 1:04}x4w4.png"
    path = os.path.join("DIV2K_train_LR_wild", file_name)
    print(f"File index: {i}")
    print(f"Actual Quality Level: {quality_map[quality_levels[i - (cat1 + cat2 + cat3)]]}")
    if predict_compression_quality(path) == quality_levels[i - (cat1 + cat2 +cat3)]:
        correct_predictions = correct_predictions + 1
    print("\n")
print(f"Testing accuracy: {correct_predictions / 50}")




Accuracy: 0.6

Classification Report:
               precision    recall  f1-score   support

           1       0.80      0.31      0.44        13
           2       0.55      0.60      0.57        20
           3       0.61      0.92      0.73        12

    accuracy                           0.60        45
   macro avg       0.65      0.61      0.58        45
weighted avg       0.64      0.60      0.58        45

File index: 150
Actual Quality Level: High Quality
Predicted Quality Level: Medium Quality
Compression Probabilities (High, Medium, Low): [0.2008906  0.66871119 0.13039821]


File index: 151
Actual Quality Level: High Quality
Predicted Quality Level: Medium Quality
Compression Probabilities (High, Medium, Low): [0.20829073 0.67273855 0.11897072]


File index: 152
Actual Quality Level: High Quality
Predicted Quality Level: High Quality
Compression Probabilities (High, Medium, Low): [9.59389153e-01 4.06108468e-02 2.25892177e-28]


File index: 153
Actual Quality Level: High Qu