In [1]:
# Professional-grade CV + ML pipeline

"""
This code will include:

Feature extraction (advanced features)

Data augmentation

Train/test split

Feature scaling

PCA (dimensionality reduction)

Cross-validation

Hyperparameter tuning

Evaluation (accuracy, confusion matrix, report)

Pipeline usage

Model saving/loading

Real-time inference on webcam
    
"""

'\nThis code will include:\n\nFeature extraction (advanced features)\n\nData augmentation\n\nTrain/test split\n\nFeature scaling\n\nPCA (dimensionality reduction)\n\nCross-validation\n\nHyperparameter tuning\n\nEvaluation (accuracy, confusion matrix, report)\n\nPipeline usage\n\nModel saving/loading\n\nReal-time inference on webcam\n\n'

In [2]:
import cv2
import numpy as np
import os
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [3]:
# -----------------------------
# Feature Extraction Function
# -----------------------------
def extract_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Noise removal
    blur = cv2.GaussianBlur(gray, (9,9), 0)

    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if len(contours) == 0:
        return None

    cnt = max(contours, key=cv2.contourArea)

    area = cv2.contourArea(cnt)
    perimeter = cv2.arcLength(cnt, True)

    x,y,w,h = cv2.boundingRect(cnt)
    aspect_ratio = w / float(h)

    circularity = 4*np.pi*area/(perimeter*perimeter) if perimeter!=0 else 0

    # Solidity
    hull = cv2.convexHull(cnt)
    hull_area = cv2.contourArea(hull)
    solidity = area / hull_area if hull_area!=0 else 0

    # Extent
    rect_area = w*h
    extent = area / rect_area if rect_area!=0 else 0

    return [area, perimeter, aspect_ratio, circularity, solidity, extent]



In [4]:
# -----------------------------
# Data Augmentation
# -----------------------------
def augment_image(img):
    augmented = []
    augmented.append(img)
    augmented.append(cv2.flip(img, 1))
    augmented.append(cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE))
    return augmented



In [5]:
# -----------------------------
# Load Dataset
# -----------------------------
def load_dataset(dataset_path="dataset"):
    X = []
    y = []

    labels = ["circles", "squares"]

    for label_index, label_name in enumerate(labels):
        folder = os.path.join(dataset_path, label_name)

        for file in os.listdir(folder):
            path = os.path.join(folder, file)
            img = cv2.imread(path)

            if img is None:
                continue

            # Apply augmentation
            for aug_img in augment_image(img):
                features = extract_features(aug_img)
                if features is not None:
                    X.append(features)
                    y.append(label_index)

    return np.array(X), np.array(y)



In [6]:
# -----------------------------
# Load Data
# -----------------------------
X, y = load_dataset("dataset")

print("Dataset size:", len(X))

Dataset size: 387


In [7]:
# -----------------------------
# Train/Test Split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [8]:
# -----------------------------
# Build Pipeline
# -----------------------------
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=4)),
    ('classifier', RandomForestClassifier())
])


In [9]:
# -----------------------------
# Hyperparameter Tuning
# -----------------------------
param_grid = {
    'classifier__n_estimators': [50, 100],
    'classifier__max_depth': [None, 10]
}

grid = GridSearchCV(pipeline, param_grid, cv=3)
grid.fit(X_train, y_train)

model = grid.best_estimator_

print("Best Parameters:", grid.best_params_)


Best Parameters: {'classifier__max_depth': None, 'classifier__n_estimators': 100}


In [10]:
# -----------------------------
# Cross Validation
# -----------------------------
scores = cross_val_score(model, X_train, y_train, cv=5)
print("Cross-validation accuracy:", scores.mean())

Cross-validation accuracy: 0.8510840824960338


In [11]:
# -----------------------------
# Evaluation
# -----------------------------
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.8461538461538461
Confusion Matrix:
 [[40  9]
 [ 3 26]]
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.82      0.87        49
           1       0.74      0.90      0.81        29

    accuracy                           0.85        78
   macro avg       0.84      0.86      0.84        78
weighted avg       0.86      0.85      0.85        78



In [12]:
# -----------------------------
# Save Model
# -----------------------------
joblib.dump(model, "shape_model.pkl")
print("Model saved.")

Model saved.


In [13]:
# -----------------------------
# Real-time Webcam Prediction
# -----------------------------
model = joblib.load("shape_model.pkl")

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_small = cv2.resize(frame, (320,240))

    features = extract_features(frame_small)

    if features is not None:
        prediction = model.predict([features])[0]

        label = "Circle" if prediction == 0 else "Square"

        cv2.putText(frame, label, (20,40),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    1, (0,255,0), 2)

    cv2.imshow("Real-time Classification", frame)

    if cv2.waitKey(1) == 27:
        break

cap.release()
cv2.destroyAllWindows()

In [15]:
# -----------------------------
# Real-time Webcam Prediction
# -----------------------------
model = joblib.load("shape_model.pkl")

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # --- DETECTION LOGIC (Finding the object) ---
    # We use the same preprocessing as your 'extract_features' function
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    # Blur helps the webcam handle flickering lights
    blurred = cv2.GaussianBlur(gray, (25, 25), 0)
    _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    for cnt in contours:
        # Ignore tiny noise/specks
        if cv2.contourArea(cnt) < 1000:
            continue
            
        x, y, w, h = cv2.boundingRect(cnt)
    
        frame_small = cv2.resize(frame, (320,240))

        features = extract_features(frame_small)

        if features is not None:
            prediction = model.predict([features])

            # Mapping numerical labels back to text
            if prediction[0] == 0:
                label = "Circle"
                color = (0, 255, 0) # Green for circle
            else:
                label = "Square"
                color = (255, 0, 0) # Blue for square

        # Draw the bounding box
        cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
    
        # Draw the label text
        cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)


    cv2.imshow("Real-time Classification", frame)

    if cv2.waitKey(1) == 27:
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
# Data Loading from an compressed numpy dataset(.npz)
import numpy as np
import cv2
import joblib
import time

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


# -----------------------------
# 1. Load Dataset
# -----------------------------
container = np.load('shapes.npz')

X_train = container['X_train']
y_train = container['y_train']
X_test  = container['X_test']
y_test  = container['y_test']

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)


# -----------------------------
# 2. Flatten Images
# -----------------------------
X_train_flat = X_train.reshape(len(X_train), -1)
X_test_flat  = X_test.reshape(len(X_test), -1)


# -----------------------------
# 3. Build ML Pipeline
# -----------------------------
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=50)),
    ('classifier', RandomForestClassifier(n_estimators=100))
])


# -----------------------------
# 4. Train Model
# -----------------------------
print("Training model...")
pipeline.fit(X_train_flat, y_train)


# -----------------------------
# 5. Evaluate Model
# -----------------------------
y_pred = pipeline.predict(X_test_flat)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


# -----------------------------
# 6. Save Model
# -----------------------------
joblib.dump(pipeline, "shape_model.pkl")
print("Model saved.")


# -----------------------------
# 7. Real-time Webcam Prediction
# -----------------------------
model = joblib.load("shape_model.pkl")

# Determine image size expected by model
img_h, img_w = X_train.shape[1], X_train.shape[2]

cap = cv2.VideoCapture(0)

prev_time = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Resize to match training images
    resized = cv2.resize(gray, (img_w, img_h))

    # Flatten
    sample = resized.reshape(1, -1)

    # Predict
    prediction = model.predict(sample)[0]

    label = f"Prediction: {prediction}"

    # FPS calculation
    current_time = time.time()
    fps = 1/(current_time-prev_time) if prev_time!=0 else 0
    prev_time = current_time

    # Display
    cv2.putText(frame, label, (20,40),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)

    cv2.putText(frame, f"FPS: {int(fps)}", (20,80),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,0), 2)

    cv2.imshow("Webcam Classification", frame)

    if cv2.waitKey(1) == 27:
        break

cap.release()
cv2.destroyAllWindows()


Train shape: (7500, 64, 64)
Test shape: (2500, 64, 64)
Training model...
Accuracy: 0.9956
Confusion Matrix:
 [[1243    7]
 [   4 1246]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      1.00      1250
           1       0.99      1.00      1.00      1250

    accuracy                           1.00      2500
   macro avg       1.00      1.00      1.00      2500
weighted avg       1.00      1.00      1.00      2500

Model saved.
