In [None]:
# Import necessary libraries
import gdown
import tarfile
import os

# Step 1: Download the .tar.gz file from Google Drive
file_id = '1ffbbyoPf-I3Y0iGbBahXpWqYdGd7xxQQ'
url = f'https://drive.google.com/uc?id={file_id}'
output_path = 'dataset.tar.gz'
gdown.download(url, output_path, quiet=False)

# Step 2: Extract the .tar.gz file
with tarfile.open(output_path, 'r:gz') as tar:
    tar.extractall()

print("Extraction complete.")

# Step 3: Check extracted files
extracted_files = os.listdir()
print(extracted_files)


Downloading...
From (original): https://drive.google.com/uc?id=1ffbbyoPf-I3Y0iGbBahXpWqYdGd7xxQQ
From (redirected): https://drive.google.com/uc?id=1ffbbyoPf-I3Y0iGbBahXpWqYdGd7xxQQ&confirm=t&uuid=849c57c9-1da6-489b-9481-ac2340ae3ee5
To: /content/dataset.tar.gz
100%|██████████| 4.40G/4.40G [01:06<00:00, 65.8MB/s]


Extraction complete.
['.config', 'dataset.tar.gz', 'dataset', 'sample_data']


In [None]:
import cv2
import numpy as np
import os
from sklearn.cluster import KMeans

# Define paths
input_folder = '/content/dataset/images'
output_folder = '/content/dataset/segmented'

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# Block size for processing
block_size = (16, 16)

# Contour smoothing function
def smooth_contours(mask, cutoff=5):
    mask = (mask > 0).astype(np.uint8)
    f_transform = np.fft.fft2(mask)
    f_shift = np.fft.fftshift(f_transform)
    rows, cols = mask.shape
    crow, ccol = rows // 2, cols // 2
    mask_filter = np.zeros((rows, cols), np.uint8)
    cv2.circle(mask_filter, (ccol, crow), cutoff, 1, -1)
    f_shift_filtered = f_shift * mask_filter
    f_ishift = np.fft.ifftshift(f_shift_filtered)
    smoothed = np.fft.ifft2(f_ishift)
    smoothed = np.abs(smoothed)
    smoothed = (smoothed > 0.5).astype(np.uint8) * 255
    return smoothed

# Feature extraction function
def extract_features(block, global_mean):
    variance = np.var(block)
    local_mean = np.mean(block)
    diff_mean = local_mean - global_mean
    sobelx = cv2.Sobel(block, cv2.CV_64F, 1, 0, ksize=3)
    sobely = cv2.Sobel(block, cv2.CV_64F, 0, 1, ksize=3)
    gx2 = np.mean(sobelx**2)
    gy2 = np.mean(sobely**2)
    gxy = np.mean(sobelx * sobely)
    coherence = np.sqrt((gx2 - gy2)**2 + 4 * gxy**2) / (gx2 + gy2 + 1e-8)
    ridge_direction = 0.5 * np.arctan2(2 * gxy, gx2 - gy2)
    fft = np.fft.fft2(block)
    energy_spectrum = np.sqrt(np.sum(np.real(fft)**2 + np.imag(fft)**2))
    return [variance, diff_mean, coherence, ridge_direction, energy_spectrum]

# Process each image in the input folder
for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.tif', '.png', '.jpg', '.jpeg', '.bmp')):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, f"{os.path.splitext(filename)[0]}_paperseg.tif")

        # Load image in grayscale
        image = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            print(f"Error: Could not load image {input_path}")
            continue

        # Calculate global mean
        global_mean = np.mean(image)

        # Apply Sobel filter
        sobel_x = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3)
        sobel_y = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3)
        sobel_magnitude = cv2.magnitude(sobel_x, sobel_y)
        sobel_magnitude = np.uint8(np.abs(sobel_magnitude))

        # Morphological operations
        kernel = np.ones((3, 3), np.uint8)
        opened_image = cv2.morphologyEx(sobel_magnitude, cv2.MORPH_OPEN, kernel)
        top_hat_opened_image = cv2.subtract(sobel_magnitude, opened_image)

        # Extract features and prepare for clustering
        h, w = top_hat_opened_image.shape
        feature_vectors = []
        coordinates = []

        for y in range(0, h, block_size[0]):
            for x in range(0, w, block_size[1]):
                block = top_hat_opened_image[y:y + block_size[0], x:x + block_size[1]]
                features = extract_features(block, global_mean)
                feature_vectors.append(features)
                coordinates.append((y, x))

        feature_vectors = np.array(feature_vectors)

        # Apply K-Means clustering
        kmeans = KMeans(n_clusters=2, random_state=0).fit(feature_vectors)
        labels_kmeans = kmeans.labels_

        # Map labels back to image
        clustered_image_kmeans = np.zeros((h, w), dtype=np.uint8)
        for idx, (y, x) in enumerate(coordinates):
            clustered_image_kmeans[y:y + block_size[0], x:x + block_size[1]] = labels_kmeans[idx] * 255

        # Smooth contours
        smoothed_contours_image = smooth_contours(clustered_image_kmeans)

        # Save the output image
        cv2.imwrite(output_path, smoothed_contours_image)
        #print(f"Processed and saved: {output_path}")

In [None]:
import cv2
import numpy as np
import os
from glob import glob

# Pre-processing and error calculation
def calculate_error_probabilities(seg_image_path, val_image_path):
    # Load the segmented image and the validation image (ground truth)
    seg_image = cv2.imread(seg_image_path, cv2.IMREAD_GRAYSCALE)
    val_image = cv2.imread(val_image_path, cv2.IMREAD_GRAYSCALE)

    if seg_image is None or val_image is None:
        print(f"Error: Could not load images {seg_image_path} or {val_image_path}")
        return None, None, None  # Return None if there is an issue loading images

    # Ensure the images are binary (foreground=255, background=0)
    _, seg_image = cv2.threshold(seg_image, 128, 255, cv2.THRESH_BINARY)
    _, val_image = cv2.threshold(val_image, 128, 255, cv2.THRESH_BINARY)

    # Calculate True Background (Nbrb) and True Foreground (Nbrf)
    Nbrb = np.sum(val_image == 0)  # True background pixels in validation
    Nbrf = np.sum(val_image == 255)  # True foreground pixels in validation

    # Calculate Number of Background Classification Errors (Nbrbe)
    Nbrbe = np.sum((seg_image == 255) & (val_image == 0))  # Foreground classified as background

    # Calculate Number of Foreground Classification Errors (Nbrfe)
    Nbrfe = np.sum((seg_image == 0) & (val_image == 255))  # Background classified as foreground

    # Calculate Prob1 and Prob2
    Prob1 = Nbrbe / Nbrb if Nbrb > 0 else 0  # Probability that foreground is classified as background
    Prob2 = Nbrfe / Nbrf if Nbrf > 0 else 0  # Probability that background is classified as foreground

    # Calculate ProbErr
    ProbErr = (Prob1 + Prob2) / 2

    return Prob1, Prob2, ProbErr

# Directories
segmentation_dir = '/content/dataset/segmented'
validation_dir = '/content/dataset/masks'

# Initialize a dictionary to store the results
final_results = {}

# Calculate the error probabilities for each image in the segmentation folder
Prob1_values = []
Prob2_values = []
ProbErr_values = []

# Loop through each segmented image in the segmentation directory
for seg_image_path in glob(os.path.join(segmentation_dir, '*paperseg.tif')):
    # Construct the corresponding validation image path
    filename = os.path.basename(seg_image_path)
    val_image_name = filename.replace('paperseg.tif', 'mask.png')
    val_image_path = os.path.join(validation_dir, val_image_name)

    # Calculate the error probabilities
    Prob1, Prob2, ProbErr = calculate_error_probabilities(seg_image_path, val_image_path)

    if Prob1 is not None:
        # Store the values in the corresponding lists
        Prob1_values.append(Prob1)
        Prob2_values.append(Prob2)
        ProbErr_values.append(ProbErr)

# Calculate the average for each probability
if Prob1_values:
    avg_Prob1 = np.mean(Prob1_values)
    avg_Prob2 = np.mean(Prob2_values)
    avg_ProbErr = np.mean(ProbErr_values)

    # Store the averages
    final_results = {
        'avg_Prob1': avg_Prob1,
        'avg_Prob2': avg_Prob2,
        'avg_ProbErr': avg_ProbErr
    }

# Print the final average results
print("Average Results:")
print(f"avg_Prob1: {final_results['avg_Prob1']:.4f}, avg_Prob2: {final_results['avg_Prob2']:.4f}, avg_ProbErr: {final_results['avg_ProbErr']:.4f}")

# Calculate and print overall averages across all datasets (if you had multiple datasets)
all_Prob1 = [final_results['avg_Prob1']]
all_Prob2 = [final_results['avg_Prob2']]
all_ProbErr = [final_results['avg_ProbErr']]

print("\nOverall Average Results:")
print(f"Overall avg_Prob1: {np.mean(all_Prob1):.4f}, Overall avg_Prob2: {np.mean(all_Prob2):.4f}, Overall avg_ProbErr: {np.mean(all_ProbErr):.4f}")
