In [None]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Initialize lists to hold data and labels
data = []
labels = []

# List of image files and corresponding labels
image_files = ['FRA.png', 'PEL.png', 'TAR.png']
image_labels = ['Fragment', 'Pellet', 'Tar']

# Loop over the images
for idx, file in enumerate(image_files):
    # Load the image
    image = cv2.imread(file)
    if image is None:
        print(f"Error loading image {file}")
        continue

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian Blur to reduce noise
    blur = cv2.GaussianBlur(gray, (5, 5), 0)

    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(
        blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV, 11, 2
    )

    # Remove small noise with morphological operations
    kernel = np.ones((3, 3), np.uint8)
    opening = cv2.morphologyEx(
        thresh, cv2.MORPH_OPEN, kernel, iterations=2
    )

    # Find contours
    contours, hierarchy = cv2.findContours(
        opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )

    # Loop over the contours
    for cnt in contours:
        # Filter by area (ignore very small particles)
        area = cv2.contourArea(cnt)
        if area > 20:  # Adjust threshold as needed
            # Compute perimeter
            perimeter = cv2.arcLength(cnt, True)

            # Compactness
            compactness = (perimeter ** 2) / area if area != 0 else 0

            # Bounding rectangle
            x, y, w, h = cv2.boundingRect(cnt)
            aspect_ratio = float(w) / h if h != 0 else 0
            area_rect_ratio = area / (w * h) if (w * h) != 0 else 0

            # Fit ellipse if possible
            if len(cnt) >= 5:
                ellipse = cv2.fitEllipse(cnt)
                (center, axes, orientation) = ellipse
                major_axis = max(axes)
                minor_axis = min(axes)
                ellipse_axis_ratio = minor_axis / major_axis if major_axis != 0 else 0
            else:
                ellipse_axis_ratio = 0

            # Centroid and distances
            M = cv2.moments(cnt)
            if M['m00'] != 0:
                cx = int(M['m10'] / M['m00'])
                cy = int(M['m01'] / M['m00'])
                # Calculate distances from centroid to contour points
                distances = [cv2.pointPolygonTest(cnt, (cx, cy), True) for point in cnt]
                max_dist = np.max(distances)
                min_dist = np.min(distances)
                dist_ratio = min_dist / max_dist if max_dist != 0 else 0
            else:
                dist_ratio = 0

            # Collect features
            features = [
                area, perimeter, compactness,
                area_rect_ratio, aspect_ratio,
                ellipse_axis_ratio, dist_ratio
            ]
            data.append(features)
            labels.append(image_labels[idx])

# Create a DataFrame
df = pd.DataFrame(data, columns=[
    'Area', 'Perimeter', 'Compactness',
    'Area_Rect_Ratio', 'Aspect_Ratio',
    'Ellipse_Axis_Ratio', 'Dist_Ratio'
])
df['Label'] = labels

# Prepare data for training
X = df.drop('Label', axis=1)
y = df['Label']

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Train a Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Predict on test set
y_pred = clf.predict(X_test)

# Evaluate the classifier
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred, labels=clf.classes_)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(
    cm, annot=True, fmt='d',
    xticklabels=clf.classes_, yticklabels=clf.classes_,
    cmap='Blues'
)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()