# **Threshold images degraded**<br/>
**Master's Degree in Data Science (A.Y. 2023/2024)**<br/>
**University of Milano - Bicocca**<br/>

Vittorio Haardt, Luca Porcelli

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Data Extraction Train

In [None]:
!unzip "/content/drive/MyDrive/VIPM/Dataset/val_set_degraded.zip" -d val_set_degraded

# Installing packages and loading libraries

In [None]:
pip install brisque

Collecting brisque
  Downloading brisque-0.0.15-py3-none-any.whl (135 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/135.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m133.1/135.6 kB[0m [31m3.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.6/135.6 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting libsvm (from brisque)
  Downloading libsvm-3.23.0.4.tar.gz (170 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m170.6/170.6 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: libsvm
  Building wheel for libsvm (setup.py) ... [?25l[?25hdone
  Created wheel for libsvm: filename=libsvm-3.23.0.4-cp310-cp310-linux_x86_64.whl size=251408 sha256=632bb9d231d6908c00b7b6b7ec94baa572a346c0666a4df4cb617c7c8a7f86a8
  Stored in directory: /

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from brisque import BRISQUE
from skimage import io
from tqdm import tqdm
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score
import os
import random
from PIL import Image

  def csr_to_problem_jit(l, x_val, x_ind, x_rowptr, prob_val, prob_ind, prob_rowptr, indx_start):


# Manual Labeling Method for Single Prints

In [None]:
# Path to the folder containing the images
image_folder = '/content/val_set_degraded'

# List to save the image name and class
images_and_classes = []

# Get the list of all files in the folder
image_files = [f for f in os.listdir(image_folder) if os.path.isfile(os.path.join(image_folder, f))]

# Select randomly 100 images
random_images = random.sample(image_files, 100)

# Loop through the selected images
for image_name in tqdm(random_images):
    # Create the full path of the image
    image_path = os.path.join(image_folder, image_name)

    # Load and display the image
    img = Image.open(image_path)
    img.show()

    # Ask for the class input
    image_class = input(f"Enter the class for the image {image_name}: ")

    # Save the image name and class in the list
    images_and_classes.append({'name': image_name, 'class': image_class})

# Print the final list
print("Final list of images and classes:")
print(images_and_classes)

In [None]:
df = pd.DataFrame(immagini_e_classi)
df['classe'] = pd.to_numeric(df['classe'])

# Optimization of Image Classification

Testing Thresholds to Identify and Distinguish between Images with Noise, Blurry, Compressed and Normal

In [None]:
def is_valid(image, soglia):
    # Convert image to HSV color space
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # Calculate histogram of saturation channel
    s = cv2.calcHist([image], [1], None, [256], [0, 256])
    return s[-1] > soglia

In [None]:
li_var = [20, 25, 30, 35, 40, 50, 60, 70, 80, 90, 100]
obj = BRISQUE(url=False)
li_s = [5, 10, 15, 20, 25, 30, 40, 50, 60, 80, 90]
soglia = [2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000]
results = []

for k in tqdm(soglia):
    for soglia_var in li_var:
        for soglia_s in li_s:
            df = pd.DataFrame(immagini_e_classi)  # Assuming immagini_e_classi is defined
            df['classe'] = pd.to_numeric(df['classe'])
            df['ris'] = None

            for i in range(len(df)):
                path = "/content/val_set_degraded" + df.iloc[i, 0]
                img = cv2.imread(path)
                gt = df.iloc[i, 1]

                grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                var = cv2.Laplacian(grey, cv2.CV_64F).var()
                if var < soglia_var:
                    ris = 3  # blurred
                else:
                    img2 = cv2.cvtColor(io.imread(path), cv2.COLOR_RGB2BGR)
                    val = is_valid(img2, soglia=k)[0]  # Assuming is_valid returns a tuple
                    if val:
                        ris = 2  # noise
                        print()
                    else:
                        s = obj.score(img)
                        if s > soglia_s:
                            ris = 4  # poor quality
                        else:
                            ris = 1  # normal
                df.iloc[i, 2] = ris
            df['classe'] = pd.to_numeric(df['classe'])
            df['ris'] = pd.to_numeric(df['ris'])
            conf_matrix = confusion_matrix(df['classe'], df['ris'])
            accuracy = accuracy_score(df['classe'], df['ris'])
            precision_per_class = precision_score(df['classe'], df['ris'], average=None)
            results.append([[soglia_var, k, soglia_s], [accuracy, precision_per_class]])

In [None]:
massimo = max(risultati, key=lambda x: x[1][0])

In [None]:
deg_lab = pd.read_csv("/content/val_set_degraded/metadata.csv")

In [None]:
deg_lab['stato'] = None

# Classification of Degraded Images

Application of Optimized Thresholds across the Set

In [None]:
soglia_var = float(massimo[0][0])
k = float(massimo[0][1])
soglia_s = float(massimo[0][2])

for i in range(len(deg_lab)):
    path = "/content/val_set_degraded" + deg_lab.iloc[i, 0]
    img = cv2.imread(path)

    grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    var = cv2.Laplacian(grey, cv2.CV_64F).var()

    if var < soglia_var:
        ris = 3  # blurred
    else:
        img2 = cv2.cvtColor(io.imread(path), cv2.COLOR_RGB2BGR)
        val = is_valid(img2, soglia=k)[0]  # Assuming is_valid returns a tuple
        if val:
            ris = 2  # noise
            print()
        else:
            s = obj.score(img)
            if s > soglia_s:
                ris = 4  # poor quality
            else:
                ris = 1  # normal

    deg_lab.at[i, 'stato'] = ris

In [None]:
#deg_lab.to_csv('/content/drive/MyDrive/VIPM/Dataset/deg_lab.csv', index=False)

In [None]:
deg_lab = pd.read_csv("/content/drive/MyDrive/VIPM/Dataset/deg_lab.csv")

The degraded images are modified and saved in a folder:

* Remove noise: Application of Non-local Means Denoising, a filtering technique that exploits pixel similarity to reduce noise while preserving important details. It considers the entire image to estimate the noise distribution. Subsequent application of bilateral filtering, where both spatial distance and color distance are considered to preserve edges while reducing noise.

* Remove blur: Application of a 2D filter with the kernel_blur, which assigns a higher weight to the central pixel, maintaining the sum of weights at 25 to preserve the overall brightness of the image. Subsequent application of Non-local Means Denoising.

* Poor quality: Application of Non-local Means Denoising. Subsequent application of a 2D filter with the kernel_qual, which assigns a higher weight to the central pixel and negative weights to surrounding pixels, promoting detail enhancement and reducing artifacts introduced by JPEG compression.

In [None]:
# Define for image processing
kernel_blur = np.array([[-1, -1, -1, -1, -1],
                        [-1, -1, -1, -1, -1],
                        [-1, -1, 25, -1, -1],
                        [-1, -1, -1, -1, -1],
                        [-1, -1, -1, -1, -1]])

kernel_qual = np.array([[0, -1, 0],
                        [-1, 5, -1],
                        [0, -1, 0]])

# Initialize BRISQUE object
bri = BRISQUE(url=False)

# Destination folder
destination_folder = "/content/drive/MyDrive/VIPM/Dataset/deg_pul.zip"

# Loop through each item in the 'deg_lab' list
for i in range(len(deg_lab)):
    path = "/content/val_set_degraded" + deg_lab.iloc[i, 0]
    img = cv2.imread(path)
    img2 = None

    # Check the degradation type and apply corresponding image processing
    if deg_lab.iloc[i, 3] == 2:  # Remove noise
        img2 = cv2.fastNlMeansDenoising(img, None, 30, 10)
        img2 = cv2.bilateralFilter(img2, 18, 40, 40)
        # Compare BRISQUE scores and choose the modified image if it has a lower score
        if bri.score(img) > bri.score(img2):
            img = img2
    elif deg_lab.iloc[i, 3] == 3:  # Remove blur
        img2 = cv2.filter2D(img, -1, kernel_blur)
        img2 = cv2.fastNlMeansDenoising(img2, None, 30, 5)
        # Compare BRISQUE scores and choose the modified image if it has a lower score
        if bri.score(img) > bri.score(img2):
            img = img2
    elif deg_lab.iloc[i, 3] == 4:  # Poor quality
        img2 = cv2.fastNlMeansDenoising(img, None, 29, 5)
        img2 = cv2.filter2D(img2, -1, kernel_qual)
        # Compare BRISQUE scores and choose the modified image if it has a lower score
        if bri.score(img) > bri.score(img2):
            img = img2
    else:
        pass  # Do nothing for other cases

    # Extract the filename without extension
    file_name_without_extension = os.path.splitext(os.path.basename(path))[0]

    # Save the modified image
    filename = os.path.join(destination_folder, f"{file_name_without_extension}.jpg")
    cv2.imwrite(filename, img)


**Reference**

- Blur: [blur-detection-with-opencv](https://pyimagesearch.com/2015/09/07/blur-detection-with-opencv/)
- BRISQUE: [brisque](https://pypi.org/project/brisque/)