# Method 2 LBP_RF Train
---

## Table of Contents

1. Library
2. Config
3. Helpers
   - `print_header()` - Formatted console output
   - `save_model_pickle()` - Save trained model
   - `save_training_info()` - Save training metadata
   - `load_image()` - Load and convert images
   - `load_yolo_label()` - Parse YOLO format labels
   - `load_dataset_split()` - Load dataset split
   - `prepare_classification_data()` - Extract ROIs and labels
   - `extract_lbp_features()` - Extract LBP histogram features
   - `extract_lbp_features_batch()` - Batch LBP feature extraction
4. Train LBP & RF
   - `train_lbp_rf()` - Main training function
5. Running Function

# Library

In [4]:
import json
import pickle
import random
import time
from pathlib import Path

import cv2
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from skimage import feature

# Config

In [5]:
# Local paths
method = "LBP_RF"
root = Path.cwd().resolve().parent.parent
data_dir = root / "data"
model_dir = root / "notebooks" / "LBP_RF" / "models"
model_dir.mkdir(parents=True, exist_ok=True)
sample_fraction = 1
random_seed=42


# Helpers

In [None]:

# Print a formatted header for console output
def print_header(title, width=60):
    print("=" * width)
    print(title)
    print("=" * width)

# --------------------------------------------------------------------------
# Save a given model object to disk using pickle
def save_model_pickle(model_data, model_path):
    model_path.parent.mkdir(parents=True, exist_ok=True)
    with open(model_path, "wb") as f:
        pickle.dump(model_data, f)

# --------------------------------------------------------------------------
# Save information about training time to JSON in the model directory
def save_training_info(training_time, model_dir):
    info_path = model_dir / "training_info.json"
    info_path.parent.mkdir(parents=True, exist_ok=True)
    info = {
        "training_time_seconds": float(training_time),
        "training_time_hours": float(training_time / 3600),
    }
    with open(info_path, "w") as f:
        json.dump(info, f, indent=2)

# --------------------------------------------------------------------------
# Load an image from disk and convert BGR to RGB
def load_image(image_path):
    image = cv2.imread(str(image_path))
    if image is None:
        raise FileNotFoundError(f"Unable to read image: {image_path}")
    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# --------------------------------------------------------------------------
# Load YOLO-formatted label file and convert normalized bbox to pixel coordinates
def load_yolo_label(label_path, w, h):
    detections = []
    if not label_path.exists():
        return detections
    with open(label_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 5:
                continue
            class_id = int(parts[0])
            xc, yc, bw, bh = map(float, parts[1:5])
            detections.append({
                "class": class_id,
                "bbox": [
                    int((xc - bw / 2) * w),
                    int((yc - bh / 2) * h),
                    int((xc + bw / 2) * w),
                    int((yc + bh / 2) * h),
                ]
            })
    return detections

# --------------------------------------------------------------------------
# Load dataset split and return list of samples (images & detections)
def load_dataset_split(base_dir, split="train", sample_fraction=sample_fraction, random_seed=random_seed):
    images_dir = base_dir / split / "images"
    labels_dir = base_dir / split / "labels"
    if not images_dir.exists():
        raise FileNotFoundError(f"Images directory not found: {images_dir.resolve()}")
    if not labels_dir.exists():
        raise FileNotFoundError(f"Labels directory not found: {labels_dir.resolve()}")

    image_files = sorted(list(images_dir.glob("*.jpg")) + list(images_dir.glob("*.png")))
    if sample_fraction < 1.0:
        rng = random.Random(random_seed)
        n = max(1, int(len(image_files) * sample_fraction))
        image_files = rng.sample(image_files, n)

    samples = []
    for img_path in image_files:
        image = load_image(img_path)
        h, w = image.shape[:2]
        detections = load_yolo_label(labels_dir / f"{img_path.stem}.txt", w, h)
        if not detections:
            continue
        samples.append({
            "image": image,
            "image_path": img_path,
            "detections": detections,
            "labels": [d["class"] for d in detections],
            "image_id": img_path.stem,
        })
    if not samples:
        raise RuntimeError(f"No labeled samples found in {images_dir.resolve()} with labels in {labels_dir.resolve()}")
    return samples

# --------------------------------------------------------------------------
# Extract cropped ROIs and corresponding labels from labeled samples
def prepare_classification_data(samples):
    images = []
    labels = []
    for sample in samples:
        for det in sample["detections"]:
            x1, y1, x2, y2 = det["bbox"]
            roi = sample["image"][y1:y2, x1:x2]
            if roi.size == 0:
                continue
            images.append(roi)
            labels.append(det["class"])
    return images, np.array(labels)

# --------------------------------------------------------------------------
# Extract LBP feature vector (histogram) from a single image (ROI)
def extract_lbp_features(image, radius=3, n_points=24, method='uniform'):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) if len(image.shape) == 3 else image
    lbp = feature.local_binary_pattern(gray, n_points, radius, method=method)
    n_bins = n_points + 2
    hist, _ = np.histogram(lbp.ravel(), bins=n_bins, range=(0, n_bins))
    hist = hist.astype(float)
    hist /= (hist.sum() + 1e-8)
    return hist

# --------------------------------------------------------------------------
# Batch process a list of images to extract LBP features for each
def extract_lbp_features_batch(images, **kwargs):
    return np.array([extract_lbp_features(img, **kwargs) for img in images])



# Train LBP & RF

In [None]:
def train_lbp_rf(sample_fraction=sample_fraction, random_seed=random_seed):
    print_header("Training Method 2: LBP + Random Forest")
    rf_params = {
        "n_estimators": 200,
        "max_depth": 30,
        "class_weight": "balanced",
        "random_state": 42,
        "n_jobs": -1
    }
    lbp_params = {
        "radius": 1,
        "n_points": 8,
        "method": "uniform"
    }

    print("\nLoading training data...")
    train_samples = load_dataset_split(data_dir, "train", sample_fraction, random_seed)
    print(f"Loaded {len(train_samples)} training samples ({int(sample_fraction*100)}%)")

    images, labels = prepare_classification_data(train_samples)
    print(f"Training samples: {len(images)}")

    print("\nExtracting LBP features...")
    features = extract_lbp_features_batch(images, **lbp_params)

    print("Training Random Forest classifier...")
    start = time.time()
    model = RandomForestClassifier(**rf_params)
    model.fit(features, labels)
    training_time = time.time() - start

    print(f"Training complete! Classes: {model.classes_}")
    print(f"Training time: {training_time:.2f} seconds ({training_time/3600:.4f} hours)")

    model_path = model_dir / "classifier.pkl"
    save_model_pickle(
        {
            "model": model,
            "classes": model.classes_,
            "lbp_params": lbp_params,
            "rf_params": rf_params,
        },
        model_path,
    )
    print(f"\nModel saved to: {model_path}")

    save_training_info(training_time, model_dir)

# Running Function

In [8]:
if __name__ == "__main__":
    train_lbp_rf()

Training Method 2: LBP + Random Forest

Loading training data...
Loaded 11499 training samples (100%)
Training samples: 15282

Extracting LBP features...
Training Random Forest classifier...
Training complete! Classes: [ 0  1  2  3  4  5  6  7  8  9 10 11]
Training time: 1.62 seconds (0.0004 hours)

Model saved to: G:\My Drive\02_Areas\04_Coding\04_Courses\Master\unsw\COMP9517\group_project\notebooks\LBP_RF\models\classifier.pkl
