In [1]:
# Setup data paths and configuration
import os
from pathlib import Path

# Set up paths
base_path = Path("..")  # Go up one level from notebooks folder
data_path = base_path / "data"
test_csv_path = data_path / "test.csv"

print("FPFH + RBF SVM Pipeline for Tree Species Classification")
print("=" * 60)
print(f"Base path: {base_path.absolute()}")
print(f"Data path: {data_path.absolute()}")
print(f"Test CSV path: {test_csv_path.absolute()}")

# Check if paths exist
if not data_path.exists():
    print("❌ Data path not found!")
else:
    print("✓ Data path found")

if not test_csv_path.exists():
    print("❌ Test CSV not found!")
else:
    print("✓ Test CSV found")

FPFH + RBF SVM Pipeline for Tree Species Classification
Base path: /Users/ayoub/work/prjt/notebooks/..
Data path: /Users/ayoub/work/prjt/notebooks/../data
Test CSV path: /Users/ayoub/work/prjt/notebooks/../data/test.csv
✓ Data path found
✓ Test CSV found


In [2]:
import numpy as np
import sys
from pathlib import Path
import time
from tqdm import tqdm
import pandas as pd
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV

# Add the src directory to Python path
sys.path.append(str(Path("..").absolute()))

from src.point_cloud_processor import load_point_cloud, extract_fpfh_features

print("All libraries imported successfully!")

All libraries imported successfully!


In [3]:
def load_dataset_paths(data_path, test_csv_path):
    """Load and split dataset paths based on test.csv file"""
    try:
        test_files_df = np.genfromtxt(test_csv_path, delimiter=',', dtype=str, skip_header=1)
        test_filenames = set(test_files_df[:, 0])
    except:
        print("Warning: Could not load test.csv, using train/test folders directly")
        return load_from_folders(data_path)
    
    X_train_paths, y_train, X_test_paths, y_test = [], [], [], []

    # Check if we have train folder or species folders directly
    train_root = data_path / "train"
    if not train_root.exists():
        # Data might be directly in folders by species
        train_root = data_path

    for species_dir in train_root.iterdir():
        if not species_dir.is_dir():
            continue
        species_name = species_dir.name
        for file_path in species_dir.iterdir():
            if file_path.is_file():  # Make sure it's a file
                if file_path.name in test_filenames:
                    X_test_paths.append(file_path)
                    y_test.append(species_name)
                else:
                    X_train_paths.append(file_path)
                    y_train.append(species_name)
    
    return X_train_paths, y_train, X_test_paths, y_test

def load_from_folders(data_path):
    """Alternative method: load from existing train/test folder structure"""
    X_train_paths, y_train, X_test_paths, y_test = [], [], [], []
    
    train_root = data_path.parent / "train"  # Go back to project root
    test_root = data_path.parent / "test"
    
    if train_root.exists():
        for species_dir in train_root.iterdir():
            if species_dir.is_dir():
                species_name = species_dir.name
                for file_path in species_dir.iterdir():
                    if file_path.is_file():
                        X_train_paths.append(file_path)
                        y_train.append(species_name)
    
    if test_root.exists():
        for species_dir in test_root.iterdir():
            if species_dir.is_dir():
                species_name = species_dir.name
                for file_path in species_dir.iterdir():
                    if file_path.is_file():
                        X_test_paths.append(file_path)
                        y_test.append(species_name)
    
    return X_train_paths, y_train, X_test_paths, y_test

# Load the dataset paths
print("Loading dataset paths...")
X_train_paths, y_train, X_test_paths, y_test = load_dataset_paths(data_path, test_csv_path)

print(f"✓ Training samples: {len(X_train_paths)}")
print(f"✓ Test samples: {len(X_test_paths)}")
print(f"✓ Species found: {sorted(set(y_train))}")

if len(X_train_paths) == 0 or len(X_test_paths) == 0:
    print("❌ No data found! Please check your data structure.")

Loading dataset paths...
✓ Training samples: 557
✓ Test samples: 134
✓ Species found: ['Ash', 'Beech', 'Douglas Fir', 'Oak', 'Pine', 'Red Oak', 'Spruce']


In [4]:
def process_files_for_features(file_paths, feature_dim=33):
    """Extract FPFH features from point cloud files"""
    features_list = []
    failed_files = 0
    
    # Use tqdm to create a nice progress bar
    for path in tqdm(file_paths, desc="Extracting FPFH Features"):
        try:
            pcd = load_point_cloud(path)
            if pcd and len(np.asarray(pcd.points)) > 0:
                # Extract FPFH features with appropriate voxel size
                features = extract_fpfh_features(pcd, voxel_size=0.2)
                if features is not None and len(features) == feature_dim:
                    features_list.append(features)
                else:
                    # Handle cases where features are None or wrong dimension
                    features_list.append(np.zeros(feature_dim))
                    failed_files += 1
            else:
                # Handle cases where a file fails to load
                features_list.append(np.zeros(feature_dim))
                failed_files += 1
        except Exception as e:
            print(f"Error processing {path.name}: {e}")
            features_list.append(np.zeros(feature_dim))
            failed_files += 1
    
    if failed_files > 0:
        print(f"Warning: {failed_files} files failed to process properly")
            
    return np.array(features_list)

# Process training and test data
print("Processing training data...")
X_train_features = process_files_for_features(X_train_paths)

print("Processing testing data...")
X_test_features = process_files_for_features(X_test_paths)

print("Feature extraction complete.")
print(f"Shape of training features: {X_train_features.shape}")
print(f"Shape of test features: {X_test_features.shape}")

# Check for any issues
if X_train_features.shape[0] == 0:
    print("❌ No training features extracted!")
if X_test_features.shape[0] == 0:
    print("❌ No test features extracted!")

Processing training data...


Extracting FPFH Features:   0%|          | 0/557 [00:00<?, ?it/s]



Extracting FPFH Features:   0%|          | 2/557 [00:00<01:43,  5.35it/s]



Extracting FPFH Features:   1%|          | 4/557 [00:00<01:23,  6.58it/s]



Extracting FPFH Features:   1%|          | 5/557 [00:00<01:22,  6.72it/s]



Extracting FPFH Features:   1%|▏         | 7/557 [00:01<02:00,  4.55it/s]



Extracting FPFH Features:   1%|▏         | 8/557 [00:01<01:58,  4.64it/s]



Extracting FPFH Features:   2%|▏         | 9/557 [00:01<02:13,  4.10it/s]



Extracting FPFH Features:   2%|▏         | 10/557 [00:02<02:20,  3.88it/s]



Extracting FPFH Features:   2%|▏         | 12/557 [00:02<01:58,  4.61it/s]



Extracting FPFH Features:   2%|▏         | 13/557 [00:02<02:01,  4.47it/s]



Extracting FPFH Features:   3%|▎         | 15/557 [00:03<01:50,  4.93it/s]



Extracting FPFH Features:   3%|▎         | 16/557 [00:03<01:41,  5.32it/s]



Extracting FPFH Features:   3%|▎         | 17/557 [00:03<02:18,  3.90it/s]



Extracting FPFH Features:   5%|▍         | 26/557 [00:04<00:54,  9.76it/s]



Extracting FPFH Features:   5%|▌         | 28/557 [00:05<01:04,  8.22it/s]



Extracting FPFH Features:   6%|▌         | 33/557 [00:05<00:57,  9.11it/s]



Extracting FPFH Features:   7%|▋         | 40/557 [00:06<00:37, 13.73it/s]



Extracting FPFH Features:   8%|▊         | 43/557 [00:06<00:50, 10.28it/s]



Extracting FPFH Features:   8%|▊         | 45/557 [00:06<00:50, 10.17it/s]



Extracting FPFH Features:   9%|▉         | 51/557 [00:07<00:40, 12.53it/s]



Extracting FPFH Features:  10%|█         | 56/557 [00:07<00:34, 14.37it/s]



Extracting FPFH Features:  11%|█         | 62/557 [00:07<00:26, 18.69it/s]



Extracting FPFH Features:  12%|█▏        | 65/557 [00:07<00:23, 20.55it/s]



Extracting FPFH Features:  13%|█▎        | 72/557 [00:08<00:33, 14.40it/s]



Extracting FPFH Features:  14%|█▍        | 79/557 [00:08<00:25, 18.55it/s]



Extracting FPFH Features:  15%|█▌        | 86/557 [00:09<00:34, 13.54it/s]



Extracting FPFH Features:  17%|█▋        | 92/557 [00:10<00:50,  9.28it/s]



Extracting FPFH Features:  17%|█▋        | 97/557 [00:10<00:46,  9.95it/s]



Extracting FPFH Features:  19%|█▊        | 104/557 [00:11<00:29, 15.35it/s]



Extracting FPFH Features:  19%|█▉        | 107/557 [00:11<00:40, 10.99it/s]



Extracting FPFH Features:  20%|██        | 114/557 [00:11<00:32, 13.44it/s]



Extracting FPFH Features:  21%|██        | 117/557 [00:12<00:28, 15.34it/s]



Extracting FPFH Features:  22%|██▏       | 120/557 [00:12<00:39, 11.19it/s]



Extracting FPFH Features:  23%|██▎       | 127/557 [00:12<00:25, 16.93it/s]



Extracting FPFH Features:  24%|██▎       | 131/557 [00:13<00:37, 11.49it/s]



Extracting FPFH Features:  24%|██▍       | 135/557 [00:13<00:32, 12.79it/s]



Extracting FPFH Features:  26%|██▌       | 143/557 [00:13<00:24, 16.95it/s]



Extracting FPFH Features:  26%|██▌       | 146/557 [00:14<00:22, 18.29it/s]



Extracting FPFH Features:  27%|██▋       | 149/557 [00:14<00:26, 15.48it/s]



Extracting FPFH Features:  27%|██▋       | 152/557 [00:14<00:27, 14.79it/s]



Extracting FPFH Features:  28%|██▊       | 155/557 [00:14<00:27, 14.82it/s]



Extracting FPFH Features:  29%|██▉       | 161/557 [00:15<00:31, 12.65it/s]



Extracting FPFH Features:  30%|███       | 169/557 [00:15<00:20, 19.27it/s]



Extracting FPFH Features:  31%|███       | 173/557 [00:15<00:16, 23.13it/s]



Extracting FPFH Features:  32%|███▏      | 180/557 [00:16<00:15, 23.66it/s]



Extracting FPFH Features:  34%|███▎      | 187/557 [00:16<00:15, 24.39it/s]



Extracting FPFH Features:  35%|███▌      | 195/557 [00:16<00:12, 28.27it/s]



Extracting FPFH Features:  37%|███▋      | 204/557 [00:16<00:11, 29.42it/s]



Extracting FPFH Features:  38%|███▊      | 211/557 [00:17<00:12, 27.19it/s]



Extracting FPFH Features:  39%|███▉      | 218/557 [00:17<00:11, 28.85it/s]



Extracting FPFH Features:  41%|████      | 227/557 [00:17<00:10, 30.55it/s]



Extracting FPFH Features:  41%|████▏     | 231/557 [00:17<00:11, 27.61it/s]



Extracting FPFH Features:  43%|████▎     | 240/557 [00:18<00:09, 32.99it/s]



Extracting FPFH Features:  44%|████▍     | 244/557 [00:18<00:09, 34.56it/s]



Extracting FPFH Features:  45%|████▌     | 251/557 [00:18<00:13, 22.57it/s]



Extracting FPFH Features:  46%|████▌     | 254/557 [00:18<00:12, 23.68it/s]



Extracting FPFH Features:  47%|████▋     | 261/557 [00:19<00:12, 24.06it/s]



Extracting FPFH Features:  48%|████▊     | 269/557 [00:19<00:11, 25.51it/s]



Extracting FPFH Features:  49%|████▉     | 272/557 [00:19<00:11, 24.12it/s]



Extracting FPFH Features:  50%|█████     | 280/557 [00:19<00:09, 28.35it/s]



Extracting FPFH Features:  51%|█████     | 283/557 [00:19<00:09, 28.07it/s]



Extracting FPFH Features:  52%|█████▏    | 289/557 [00:20<00:12, 21.49it/s]



Extracting FPFH Features:  52%|█████▏    | 292/557 [00:20<00:13, 20.01it/s]



Extracting FPFH Features:  53%|█████▎    | 297/557 [00:20<00:16, 15.84it/s]



Extracting FPFH Features:  54%|█████▍    | 303/557 [00:21<00:19, 13.13it/s]



Extracting FPFH Features:  55%|█████▍    | 305/557 [00:21<00:20, 12.43it/s]



Extracting FPFH Features:  56%|█████▌    | 311/557 [00:21<00:17, 13.72it/s]



Extracting FPFH Features:  57%|█████▋    | 319/557 [00:22<00:09, 24.75it/s]



Extracting FPFH Features:  58%|█████▊    | 325/557 [00:22<00:10, 22.08it/s]



Extracting FPFH Features:  62%|██████▏   | 345/557 [00:22<00:05, 36.76it/s]



Extracting FPFH Features:  63%|██████▎   | 349/557 [00:23<00:07, 26.04it/s]



Extracting FPFH Features:  63%|██████▎   | 353/557 [00:23<00:09, 21.49it/s]



Extracting FPFH Features:  64%|██████▍   | 356/557 [00:23<00:10, 19.69it/s]



Extracting FPFH Features:  64%|██████▍   | 359/557 [00:23<00:11, 17.64it/s]



Extracting FPFH Features:  65%|██████▍   | 361/557 [00:24<00:11, 16.54it/s]



Extracting FPFH Features:  66%|██████▌   | 365/557 [00:24<00:11, 16.52it/s]



Extracting FPFH Features:  66%|██████▌   | 369/557 [00:24<00:12, 14.54it/s]



Extracting FPFH Features:  67%|██████▋   | 371/557 [00:24<00:12, 14.38it/s]



Extracting FPFH Features:  68%|██████▊   | 376/557 [00:25<00:12, 14.57it/s]



Extracting FPFH Features:  68%|██████▊   | 380/557 [00:25<00:11, 16.01it/s]



Extracting FPFH Features:  69%|██████▊   | 382/557 [00:25<00:12, 14.38it/s]



Extracting FPFH Features:  69%|██████▉   | 387/557 [00:25<00:10, 15.85it/s]



Extracting FPFH Features:  70%|███████   | 391/557 [00:26<00:10, 15.20it/s]



Extracting FPFH Features:  71%|███████   | 393/557 [00:26<00:10, 16.03it/s]



Extracting FPFH Features:  71%|███████▏  | 397/557 [00:26<00:11, 14.42it/s]



Extracting FPFH Features:  72%|███████▏  | 399/557 [00:26<00:11, 13.43it/s]



Extracting FPFH Features:  72%|███████▏  | 403/557 [00:26<00:10, 14.74it/s]



Extracting FPFH Features:  73%|███████▎  | 407/557 [00:27<00:10, 14.66it/s]



Extracting FPFH Features:  74%|███████▍  | 411/557 [00:27<00:09, 15.83it/s]



Extracting FPFH Features:  74%|███████▍  | 413/557 [00:27<00:09, 15.41it/s]



Extracting FPFH Features:  75%|███████▍  | 417/557 [00:27<00:10, 13.81it/s]



Extracting FPFH Features:  75%|███████▌  | 420/557 [00:28<00:08, 15.29it/s]



Extracting FPFH Features:  76%|███████▌  | 424/557 [00:28<00:08, 15.07it/s]



Extracting FPFH Features:  78%|███████▊  | 432/557 [00:28<00:05, 21.66it/s]



Extracting FPFH Features:  79%|███████▊  | 438/557 [00:28<00:05, 20.13it/s]



Extracting FPFH Features:  81%|████████  | 449/557 [00:29<00:06, 17.90it/s]



Extracting FPFH Features:  81%|████████▏ | 453/557 [00:29<00:04, 21.69it/s]



Extracting FPFH Features:  83%|████████▎ | 463/557 [00:30<00:04, 20.00it/s]



Extracting FPFH Features:  84%|████████▍ | 469/557 [00:30<00:05, 16.85it/s]



Extracting FPFH Features:  85%|████████▍ | 471/557 [00:30<00:05, 14.56it/s]



Extracting FPFH Features:  87%|████████▋ | 483/557 [00:31<00:03, 21.03it/s]



Extracting FPFH Features:  88%|████████▊ | 490/557 [00:32<00:05, 11.45it/s]



Extracting FPFH Features:  89%|████████▉ | 498/557 [00:32<00:04, 14.06it/s]



Extracting FPFH Features:  91%|█████████▏| 509/557 [00:33<00:02, 18.09it/s]



Extracting FPFH Features:  94%|█████████▎| 521/557 [00:33<00:01, 21.72it/s]



Extracting FPFH Features:  99%|█████████▊| 549/557 [00:36<00:00, 13.75it/s]



Extracting FPFH Features: 100%|██████████| 557/557 [00:36<00:00, 15.28it/s]


Processing testing data...


Extracting FPFH Features:   0%|          | 0/134 [00:00<?, ?it/s]



Extracting FPFH Features:   1%|          | 1/134 [00:00<00:43,  3.03it/s]



Extracting FPFH Features:   2%|▏         | 3/134 [00:00<00:26,  4.96it/s]



Extracting FPFH Features:   6%|▌         | 8/134 [00:00<00:09, 13.02it/s]



Extracting FPFH Features:   9%|▉         | 12/134 [00:01<00:06, 17.56it/s]



Extracting FPFH Features:  11%|█         | 15/134 [00:01<00:10, 11.25it/s]



Extracting FPFH Features:  17%|█▋        | 23/134 [00:02<00:12,  8.86it/s]



Extracting FPFH Features:  19%|█▊        | 25/134 [00:02<00:13,  8.05it/s]



Extracting FPFH Features:  24%|██▍       | 32/134 [00:03<00:08, 12.45it/s]



Extracting FPFH Features:  29%|██▉       | 39/134 [00:03<00:05, 16.12it/s]



Extracting FPFH Features:  34%|███▍      | 46/134 [00:04<00:05, 17.43it/s]



Extracting FPFH Features:  39%|███▉      | 52/134 [00:04<00:04, 19.15it/s]



Extracting FPFH Features:  44%|████▍     | 59/134 [00:04<00:03, 19.27it/s]



Extracting FPFH Features:  47%|████▋     | 63/134 [00:04<00:03, 22.65it/s]



Extracting FPFH Features:  49%|████▉     | 66/134 [00:05<00:03, 17.18it/s]



Extracting FPFH Features:  54%|█████▎    | 72/134 [00:05<00:03, 18.05it/s]



Extracting FPFH Features:  60%|█████▉    | 80/134 [00:05<00:02, 23.69it/s]



Extracting FPFH Features:  63%|██████▎   | 84/134 [00:05<00:02, 24.96it/s]



Extracting FPFH Features:  65%|██████▍   | 87/134 [00:06<00:02, 21.96it/s]



Extracting FPFH Features:  69%|██████▉   | 93/134 [00:06<00:02, 18.47it/s]



Extracting FPFH Features:  72%|███████▏  | 97/134 [00:06<00:02, 17.33it/s]



Extracting FPFH Features:  74%|███████▍  | 99/134 [00:06<00:02, 15.22it/s]



Extracting FPFH Features:  77%|███████▋  | 103/134 [00:07<00:02, 12.38it/s]



Extracting FPFH Features:  78%|███████▊  | 105/134 [00:07<00:02, 13.82it/s]



Extracting FPFH Features:  85%|████████▌ | 114/134 [00:08<00:01, 12.03it/s]



Extracting FPFH Features:  87%|████████▋ | 116/134 [00:08<00:01, 11.41it/s]



Extracting FPFH Features:  90%|█████████ | 121/134 [00:08<00:01, 12.75it/s]



Extracting FPFH Features: 100%|██████████| 134/134 [00:09<00:00, 13.94it/s]

Feature extraction complete.
Shape of training features: (557, 33)
Shape of test features: (134, 33)





In [5]:
# Crucial step for SVMs: feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_features)
X_test_scaled = scaler.transform(X_test_features) # Use the same scaler

# Create and train the RBF SVM
print("Training the RBF SVM classifier...")

svm_classifier = SVC(kernel='rbf', C=10, gamma='scale', class_weight='balanced', random_state=42)
svm_classifier.fit(X_train_scaled, y_train)
print("Training complete.")

Training the RBF SVM classifier...
Training complete.


In [6]:
# Hyperparameter Optimization with Grid Search
print("Performing hyperparameter optimization with Grid Search...")
print("=" * 60)

from sklearn.model_selection import StratifiedKFold

# Define parameter grid (tune class_weight as well)
param_grid = {
    'C': [0.1, 1, 3, 10, 30, 100],
    'gamma': ['scale', 'auto', 0.03, 0.1, 0.3, 1],
    'class_weight': [None, 'balanced'],
    'kernel': ['rbf']
}

# Use stratified CV and balanced accuracy to reward minority-class performance
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

grid_search = GridSearchCV(
    SVC(random_state=42),
    param_grid,
    cv=cv,
    verbose=2,
    n_jobs=-1,
    scoring='balanced_accuracy'
)

# Fit the grid search
start_time = time.time()
print(f"Testing {len(param_grid['C']) * len(param_grid['gamma']) * len(param_grid['class_weight'])} parameter combinations...")
grid_search.fit(X_train_scaled, y_train)
end_time = time.time()

# Results
print(f"\nGrid Search completed in {end_time - start_time:.2f} seconds")
print(f"Best parameters found: {grid_search.best_params_}")
print(f"Best cross-validation balanced accuracy: {grid_search.best_score_:.4f}")

# Get the best estimator
best_svm = grid_search.best_estimator_
print(f"Best SVM model: {best_svm}")

# Show top 5 parameter combinations
print(f"\nTop 5 parameter combinations (by balanced accuracy):")
print("-" * 50)
results_df = pd.DataFrame(grid_search.cv_results_)
top_5 = results_df.nlargest(5, 'mean_test_score')[['params', 'mean_test_score', 'std_test_score']]
for idx, row in top_5.iterrows():
    params = row['params']
    mean_score = row['mean_test_score']
    std_score = row['std_test_score']
    print(f"C={params['C']}, gamma={params['gamma']}, class_weight={params.get('class_weight')}: {mean_score:.4f} (±{std_score:.4f})")

print(f"\n{'='*60}")
print("Hyperparameter optimization completed!")

Performing hyperparameter optimization with Grid Search...
Testing 72 parameter combinations...
Fitting 5 folds for each of 72 candidates, totalling 360 fits
[CV] END ...C=0.1, class_weight=None, gamma=auto, kernel=rbf; total time=   0.0s
[CV] END ..C=0.1, class_weight=None, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END ...C=0.1, class_weight=None, gamma=auto, kernel=rbf; total time=   0.0s
[CV] END ..C=0.1, class_weight=None, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END ..C=0.1, class_weight=None, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END ..C=0.1, class_weight=None, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END ..C=0.1, class_weight=None, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END ...C=0.1, class_weight=None, gamma=auto, kernel=rbf; total time=   0.0s
[CV] END ...C=0.1, class_weight=None, gamma=0.03, kernel=rbf; total time=   0.0s
[CV] END ...C=0.1, class_weight=None, gamma=0.03, kernel=rbf; total time=   0.0s
[CV] END ...C=0.1, class_weight=

In [7]:
print("Evaluating the optimized model...")
y_pred_optimized = best_svm.predict(X_test_scaled)

# Also evaluate the default model for comparison
y_pred_default = svm_classifier.predict(X_test_scaled)

# Get the unique class names from both train and test sets
train_classes = set(y_train)
test_classes = set(y_test)
all_classes = sorted(list(train_classes | test_classes))

print("\n" + "="*70)
print("          FPFH + RBF SVM Classification Results (OPTIMIZED)")
print("="*70)

# Compare accuracies
accuracy_optimized = accuracy_score(y_test, y_pred_optimized)
accuracy_default = accuracy_score(y_test, y_pred_default)

print(f"Optimized Model Accuracy: {accuracy_optimized:.4f} ({accuracy_optimized*100:.2f}%)")
print(f"Default Model Accuracy:   {accuracy_default:.4f} ({accuracy_default*100:.2f}%)")
print(f"Improvement: {(accuracy_optimized - accuracy_default)*100:+.2f} percentage points")

# Detailed classification report for optimized model
print(f"\nNumber of test samples: {len(y_test)}")
print(f"Number of species: {len(all_classes)}")
print(f"Species: {', '.join(all_classes)}")

print(f"\nDetailed Classification Report (Optimized Model):")
print("-" * 70)
report_optimized = classification_report(y_test, y_pred_optimized, target_names=all_classes, zero_division=0)
print(report_optimized)

# Additional metrics
from sklearn.metrics import confusion_matrix
import pandas as pd

print("\nConfusion Matrix (Optimized Model):")
print("-" * 40)
cm_optimized = confusion_matrix(y_test, y_pred_optimized, labels=all_classes)
cm_df_optimized = pd.DataFrame(cm_optimized, index=all_classes, columns=all_classes)
print(cm_df_optimized)

# Per-class accuracy breakdown for optimized model
print(f"\nPer-class Performance (Optimized Model):")
print("-" * 50)
for i, species in enumerate(all_classes):
    if species in set(y_test):
        # Get indices for this species in test set
        species_indices = [j for j, label in enumerate(y_test) if label == species]
        species_predictions_opt = [y_pred_optimized[j] for j in species_indices]
        species_predictions_def = [y_pred_default[j] for j in species_indices]
        
        species_accuracy_opt = sum(1 for pred in species_predictions_opt if pred == species) / len(species_predictions_opt)
        species_accuracy_def = sum(1 for pred in species_predictions_def if pred == species) / len(species_predictions_def)
        improvement = species_accuracy_opt - species_accuracy_def
        
        print(f"{species:<15}: {species_accuracy_opt:.4f} (vs {species_accuracy_def:.4f}, {improvement:+.4f}) | {len(species_indices):3d} samples")

print(f"\nBest Model Parameters:")
print(f"C = {best_svm.C}")
print(f"gamma = {best_svm.gamma}")
print(f"kernel = {best_svm.kernel}")

print(f"\n{'='*70}")
print("Optimized pipeline completed successfully!")

Evaluating the optimized model...

          FPFH + RBF SVM Classification Results (OPTIMIZED)
Optimized Model Accuracy: 0.7910 (79.10%)
Default Model Accuracy:   0.6866 (68.66%)
Improvement: +10.45 percentage points

Number of test samples: 134
Number of species: 7
Species: Ash, Beech, Douglas Fir, Oak, Pine, Red Oak, Spruce

Detailed Classification Report (Optimized Model):
----------------------------------------------------------------------
              precision    recall  f1-score   support

         Ash       0.25      0.29      0.27         7
       Beech       0.87      0.84      0.86        32
 Douglas Fir       0.82      0.86      0.84        36
         Oak       0.67      0.50      0.57         4
        Pine       0.67      0.80      0.73         5
     Red Oak       0.68      0.79      0.73        19
      Spruce       0.96      0.81      0.88        31

    accuracy                           0.79       134
   macro avg       0.70      0.70      0.70       134
weighted