In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
import zipfile
import io
import os
# No 'google.colab' import needed for local Jupyter Notebooks

def process_image(img_raw):
    """
    Processes a single image using DBSCAN for segmentation.
    Returns the original, grayscale, and clustered (enhanced) images.
    """
    if img_raw is None:
        print("Error: Input image is None.")
        return None, None, None

    # 1. Convert to grayscale
    gray = cv2.cvtColor(img_raw, cv2.COLOR_BGR2GRAY)

    # 2. Apply Gaussian blur
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # 3. Create feature vectors for each pixel (row, col, intensity)
    rows, cols = blurred.shape
    features = []
    for r in range(rows):
        for c in range(cols):
            features.append([r, c, blurred[r, c]])
    features = np.array(features)

    # 4. Apply DBSCAN clustering
    # Parameters might need fine-tuning for different image sets
    # Experiment with these values if your segmentation isn't optimal
    dbscan = DBSCAN(eps=5, min_samples=38) 
    clusters = dbscan.fit_predict(features)

    # 5. Reshape the cluster labels to the original image dimensions
    clustered_image = clusters.reshape((rows, cols))

    # 6. Create a colored output image (white background)
    output_img = np.ones_like(img_raw) * 255

    # 7. Color the largest non-noise cluster black
    unique_labels, counts = np.unique(clustered_image, return_counts=True)
    
    # Exclude noise label (-1) from consideration
    non_noise_labels = unique_labels[unique_labels != -1]
    
    if len(non_noise_labels) > 0:
        # Find the largest non-noise cluster by count
        largest_cluster_label = non_noise_labels[np.argmax(counts[unique_labels != -1])]
        mask = (clustered_image == largest_cluster_label)
        output_img[mask] = [0, 0, 0] # Color the largest cluster black
    else:
        print("No significant clusters found other than noise. Output image will be mostly white.")

    return img_raw, gray, output_img

def calculate_simple_accuracy(enhanced_img, original_gray_img):
    """
    Calculates a simple 'accuracy' score based on the proportion of
    black pixels in the enhanced image relative to the overall image size.
    This is a proxy for segmentation quality, as no ground truth masks are provided.
    A higher score indicates a larger segmented (black) area.
    """
    # Count black pixels in the enhanced image (assuming black is [0,0,0])
    black_pixels = np.sum(np.all(enhanced_img == [0, 0, 0], axis=-1))
    total_pixels = enhanced_img.shape[0] * enhanced_img.shape[1]
    
    # Calculate a normalized score for segmented area
    segmentation_ratio = black_pixels / total_pixels
    
    return segmentation_ratio

def plot_histogram_with_centroids(image, title, ax):
    """
    Generates a histogram for a grayscale image and plots centroid positions.
    """
    if len(image.shape) == 3: # If it's a BGR image, convert to grayscale
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray_image = image # Already grayscale

    # Calculate histogram
    hist = cv2.calcHist([gray_image], [0], None, [256], [0, 256])
    ax.plot(hist, color='black')
    ax.set_title(title)
    ax.set_xlabel('Pixel Intensity')
    ax.set_ylabel('Frequency')

    # Calculate and plot centroid (mean intensity value)
    flat_image = gray_image.flatten()
    if len(flat_image) > 0:
        centroid = np.mean(flat_image)
        ax.axvline(centroid, color='red', linestyle='dashed', linewidth=1)
        ax.text(centroid + 5, ax.get_ylim()[1] * 0.9, f'Centroid: {centroid:.2f}', color='red')


# --- Main execution block for Jupyter Notebook (Local) ---

# 1. Get Zip File Path from User Input
zip_file_path = input("Please enter the full path to your zip file (e.g., C:\\Users\\YourName\\images.zip): ").strip()
# On Windows, input() might include quotes if you drag-and-drop. Remove them.
if zip_file_path.startswith("'") and zip_file_path.endswith("'"):
    zip_file_path = zip_file_path[1:-1]
elif zip_file_path.startswith('"') and zip_file_path.endswith('"'):
    zip_file_path = zip_file_path[1:-1]


enhanced_images_data = []
image_names = []
accuracy_scores = []

# Create a directory to save enhanced images
output_dir = "enhanced_images_output"
os.makedirs(output_dir, exist_ok=True)

try:
    if not os.path.exists(zip_file_path):
        raise FileNotFoundError(f"Zip file not found at: {zip_file_path}")

    with zipfile.ZipFile(zip_file_path, 'r') as zf:
        # Filter for common image file extensions
        image_files = [f for f in zf.namelist() if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tif', '.tiff'))]
        
        if not image_files:
            print("No image files found in the zip archive.")
        else:
            print(f"Found {len(image_files)} image(s) in the zip file.")
            
            # Process up to 5 images
            images_to_process = image_files[:5]
            if len(image_files) > 5:
                print(f"Note: Processing only the first {len(images_to_process)} images found in the zip file.")

            for img_name in images_to_process:
                print(f"\nProcessing {img_name}...")
                with zf.open(img_name) as img_file:
                    img_data = img_file.read()
                    img_array = np.frombuffer(img_data, np.uint8)
                    img_raw = cv2.imdecode(img_array, cv2.IMREAD_COLOR)

                    if img_raw is None:
                        print(f"Warning: Could not decode image {img_name}. Skipping.")
                        continue

                    original_img, gray_img, enhanced_img = process_image(img_raw)
                    
                    if enhanced_img is not None:
                        enhanced_images_data.append((original_img, enhanced_img, gray_img))
                        image_names.append(img_name)
                        
                        # Calculate and store accuracy
                        accuracy = calculate_simple_accuracy(enhanced_img, gray_img)
                        accuracy_scores.append(accuracy)
                        
                        # Save the enhanced image to the output directory
                        base_name = os.path.splitext(os.path.basename(img_name))[0]
                        output_path = os.path.join(output_dir, f"enhanced_{base_name}.png")
                        cv2.imwrite(output_path, enhanced_img)
                        print(f"Enhanced image saved to {output_path}")

except FileNotFoundError as e:
    print(f"Error: {e}")
except zipfile.BadZipFile:
    print(f"Error: '{zip_file_path}' is not a valid zip file. Please check your file.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


# --- Display Results in Jupyter Notebook ---

if not enhanced_images_data:
    print("No images were successfully processed to display results.")
else:
    # 1. Display all enhanced images (Original vs. Enhanced)
    print("\n--- Original vs. Enhanced Images ---")
    plt.figure(figsize=(15, 6 * len(enhanced_images_data)))
    for i, (original, enhanced, _) in enumerate(enhanced_images_data):
        plt.subplot(len(enhanced_images_data), 2, 2*i + 1)
        plt.imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB))
        plt.title(f'Original: {image_names[i]}')
        plt.axis('off')

        plt.subplot(len(enhanced_images_data), 2, 2*i + 2)
        plt.imshow(enhanced)
        plt.title(f'Enhanced: {image_names[i]}')
        plt.axis('off')
    plt.tight_layout()
    plt.show()

    # 2. Bar chart of "Accuracy" (Segmentation Ratio)
    print("\n--- Segmentation Ratio Bar Chart ---")
    plt.figure(figsize=(10, 6))
    bar_positions = np.arange(len(image_names))
    plt.bar(bar_positions, accuracy_scores, color='skyblue')
    plt.xlabel('Image Name')
    plt.ylabel('Segmentation Ratio (Proxy for Accuracy)')
    plt.title('Segmentation Ratio for Each Processed Image')
    # Use basename for cleaner labels on the x-axis
    plt.xticks(bar_positions, [os.path.basename(name) for name in image_names], rotation=45, ha='right')
    plt.ylim(0, 1) # Accuracy/ratio is typically between 0 and 1
    plt.tight_layout()
    plt.show()

    # 3. Histograms with Centroids for Original Grayscale and Enhanced Images
    print("\n--- Histograms with Centroids ---")
    num_images = len(enhanced_images_data)
    # Adjust subplot grid based on number of images
    fig_histograms, axes_hist = plt.subplots(num_images, 2, figsize=(15, 5 * num_images))
    
    # Ensure axes_hist is always a 2D array for consistent indexing
    if num_images == 1:
        axes_hist = np.array([axes_hist]) # Make it a 2D array for consistent indexing [i, 0] and [i, 1]

    for i, (original, enhanced, gray) in enumerate(enhanced_images_data):
        # Histogram for Original Grayscale Image
        plot_histogram_with_centroids(gray, f'Original Histogram: {os.path.basename(image_names[i])}', axes_hist[i, 0])
        
        # Histogram for Enhanced Image (convert to grayscale first for histogram calculation)
        enhanced_gray = cv2.cvtColor(enhanced, cv2.COLOR_BGR2GRAY)
        plot_histogram_with_centroids(enhanced_gray, f'Enhanced Histogram: {os.path.basename(image_names[i])}', axes_hist[i, 1])

    plt.tight_layout()
    plt.show()