In [1]:
#!/usr/bin/env python3
"""
Wine Classification - OOB Error, Confusion Matrix, Classification Report
Tests all 4 configurations: GPU/CPU Ã— Casewise/Non-casewise
Uses built-in load_wine, confusion_matrix, and classification_report
"""

import sys
import os

# Get the directory where this notebook is located
notebook_dir = os.path.dirname(os.path.abspath('__file__'))
# Or use absolute path directly:
sys.path.insert(0, '/home/bigboidad/bcrfkit-arxiv/github-release/python')

import numpy as np
import RFX as rf
import time

print(f"RFX loaded from: {rf.__file__}")

RFX loaded from: /home/bigboidad/bcrfkit-arxiv/github-release/python/RFX.cpython-313-x86_64-linux-gnu.so


In [2]:
# Feature names for Wine dataset
FEATURE_NAMES = [
    'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash',
    'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols',
    'Proanthocyanins', 'Color intensity', 'Hue',
    'OD280/OD315 of diluted wines', 'Proline'
]

CLASS_NAMES = ['Class 0', 'Class 1', 'Class 2']

def print_confusion_matrix(cm, n_classes):
    """Pretty print confusion matrix"""
    # Header
    header = "          " + "  ".join(f"Pred {i}" for i in range(n_classes))
    print(header)
    print("-" * len(header))
    
    # Rows
    for i in range(n_classes):
        row = f"True {i}  |"
        for j in range(n_classes):
            row += f"   {cm[i, j]:3d}  "
        print(row)
    print()

 

In [3]:
# Load Wine dataset (built-in)
X, y = rf.load_wine()
n_samples, n_features = X.shape
n_classes = len(np.unique(y))

print(f"\nDataset: Wine (UCI ML - built-in)")
print(f"   Samples: {n_samples}")
print(f"   Features: {n_features}")
print(f"   Classes: {n_classes}")
print(f"   Class distribution: {np.bincount(y).tolist()}")

# Run all 4 configurations
ntree = 100
results = {}



Dataset: Wine (UCI ML - built-in)
   Samples: 178
   Features: 13
   Classes: 3
   Class distribution: [59, 71, 48]


In [4]:
#1. GPU casewise

In [5]:
#GPU casewise
use_gpu = True
mode='gpu' if use_gpu else False
use_casewise=True
weighting = 'case-wise' if use_casewise else 'non-case-wise'
run_type='gpu_cw'
# Create model
model = rf.RandomForestClassifier(
    ntree=ntree,
    mtry=4,  # sqrt(13) â‰ˆ 3.6
    nsample=X.shape[0],
    nclass=n_classes,
    use_gpu=use_gpu,
    batch_size=25,  # Auto SM-aware batching with batch size 0
    iseed=42,
    compute_proximity=False,
    compute_importance=True,
    compute_local_importance=False,
    use_casewise=use_casewise
)

# Train
print(f"\nTraining {ntree} trees...")
start_time = time.time()
model.fit(X, y)
elapsed = time.time() - start_time


# Get results
oob_error = model.get_oob_error()
oob_preds = model.get_oob_predictions()

print(f"Training time: {elapsed:.2f}s ({ntree/elapsed:.1f} trees/sec)")


# OOB Error
print(f"\nðŸ“Š OOB Error: {oob_error:.6f} ({oob_error*100:.2f}%)")
print(f"   OOB Accuracy: {(1-oob_error)*100:.2f}%")

# Confusion Matrix (using RFX built-in)
cm = rf.confusion_matrix(y.astype(np.int32), oob_preds.astype(np.int32))
print(f"\nðŸ“Š Confusion Matrix (rf.confusion_matrix):")

print_confusion_matrix(cm, n_classes)

# Classification Report (using RFX built-in - now pure Python, Jupyter-safe)
print(f"ðŸ“Š Classification Report (rf.classification_report):")
print(rf.classification_report(y.astype(np.int32), oob_preds.astype(np.int32)))

results[run_type] = {
    'mode': f"{mode} {weighting}",
    'oob_error': oob_error,
    'confusion_matrix': cm,
    'time': elapsed,
    'oob_preds': oob_preds
}



Training 100 trees...
Training Random Forest Classifier with 100 trees...


Training Random Forest:   0%|                                                                                                              | 0/100 tree [00:00<?, ?tree/s]

                                                                                                                                                                          


ðŸš€ GPU MEMORY STATUS (After Training):
ðŸ“Š GPU Memory:
   Total: 12.0 GB
   Available: 0.0 GB
   Used: 12.0 GB
   Usage: 100.0%
Training time: 23.85s (4.2 trees/sec)

ðŸ“Š OOB Error: 0.028090 (2.81%)
   OOB Accuracy: 97.19%

ðŸ“Š Confusion Matrix (rf.confusion_matrix):
          Pred 0  Pred 1  Pred 2
--------------------------------
True 0  |    59       0       0  
True 1  |     3      67       1  
True 2  |     0       1      47  

ðŸ“Š Classification Report (rf.classification_report):

Classification Report:
     Class    Precision       Recall     F1-Score      Support
----------------------------------------------------------
         0       0.9516       1.0000       0.9752           59
         1       0.9853       0.9437       0.9640           71
         2       0.9792       0.9792       0.9792           48





In [6]:
#GPU noncasewise
use_gpu = True
mode='gpu' if use_gpu else False
use_casewise=False
weighting = 'case-wise' if use_casewise else 'non-case-wise'
run_type='gpu_ncw'
# Create model
model = rf.RandomForestClassifier(
    ntree=ntree,
    mtry=4,  # sqrt(13) â‰ˆ 3.6
    nsample=X.shape[0],
    nclass=n_classes,
    use_gpu=use_gpu,
    batch_size=25,  # Auto SM-aware batching with batch size 0
    iseed=42,
    compute_proximity=False,
    compute_importance=True,
    compute_local_importance=False,
    use_casewise=use_casewise
)

# Train
print(f"\nTraining {ntree} trees...")
start_time = time.time()
model.fit(X, y)
elapsed = time.time() - start_time


# Get results
oob_error = model.get_oob_error()
oob_preds = model.get_oob_predictions()

print(f"Training time: {elapsed:.2f}s ({ntree/elapsed:.1f} trees/sec)")


# OOB Error
print(f"\nðŸ“Š OOB Error: {oob_error:.6f} ({oob_error*100:.2f}%)")
print(f"   OOB Accuracy: {(1-oob_error)*100:.2f}%")

# Confusion Matrix (using RFX built-in)
cm = rf.confusion_matrix(y.astype(np.int32), oob_preds.astype(np.int32))
print(f"\nðŸ“Š Confusion Matrix (rf.confusion_matrix):")

print_confusion_matrix(cm, n_classes)

# Classification Report (using RFX built-in - now pure Python, Jupyter-safe)
print(f"ðŸ“Š Classification Report (rf.classification_report):")
print(rf.classification_report(y.astype(np.int32), oob_preds.astype(np.int32)))

results[run_type] = {
    'mode': f"{mode} {weighting}",
    'oob_error': oob_error,
    'confusion_matrix': cm,
    'time': elapsed,
    'oob_preds': oob_preds
}



Training 100 trees...
Training Random Forest Classifier with 100 trees...


                                                                                                                                                                          


ðŸš€ GPU MEMORY STATUS (After Training):
ðŸ“Š GPU Memory:
   Total: 12.0 GB
   Available: 0.0 GB
   Used: 12.0 GB
   Usage: 100.0%
Training time: 22.97s (4.4 trees/sec)

ðŸ“Š OOB Error: 0.028090 (2.81%)
   OOB Accuracy: 97.19%

ðŸ“Š Confusion Matrix (rf.confusion_matrix):
          Pred 0  Pred 1  Pred 2
--------------------------------
True 0  |    59       0       0  
True 1  |     2      67       2  
True 2  |     0       1      47  

ðŸ“Š Classification Report (rf.classification_report):

Classification Report:
     Class    Precision       Recall     F1-Score      Support
----------------------------------------------------------
         0       0.9672       1.0000       0.9833           59
         1       0.9853       0.9437       0.9640           71
         2       0.9592       0.9792       0.9691           48





In [7]:
#CPU noncasewise
use_gpu = False
mode='gpu' if use_gpu else False
use_casewise=False
weighting = 'case-wise' if use_casewise else 'non-case-wise'
run_type='cpu_ncw'
# Create model
model = rf.RandomForestClassifier(
    ntree=ntree,
    mtry=4,  # sqrt(13) â‰ˆ 3.6
    nsample=X.shape[0],
    nclass=n_classes,
    use_gpu=use_gpu,
    batch_size=25,  # Auto SM-aware batching with batch size 0
    iseed=42,
    compute_proximity=False,
    compute_importance=True,
    compute_local_importance=False,
    use_casewise=use_casewise
)

# Train
print(f"\nTraining {ntree} trees...")
start_time = time.time()
model.fit(X, y)
elapsed = time.time() - start_time


# Get results
oob_error = model.get_oob_error()
oob_preds = model.get_oob_predictions()

print(f"Training time: {elapsed:.2f}s ({ntree/elapsed:.1f} trees/sec)")


# OOB Error
print(f"\nðŸ“Š OOB Error: {oob_error:.6f} ({oob_error*100:.2f}%)")
print(f"   OOB Accuracy: {(1-oob_error)*100:.2f}%")

# Confusion Matrix (using RFX built-in)
cm = rf.confusion_matrix(y.astype(np.int32), oob_preds.astype(np.int32))
print(f"\nðŸ“Š Confusion Matrix (rf.confusion_matrix):")

print_confusion_matrix(cm, n_classes)

# Classification Report (using RFX built-in - now pure Python, Jupyter-safe)
print(f"ðŸ“Š Classification Report (rf.classification_report):")
print(rf.classification_report(y.astype(np.int32), oob_preds.astype(np.int32)))

results[run_type] = {
    'mode': f"{mode} {weighting}",
    'oob_error': oob_error,
    'confusion_matrix': cm,
    'time': elapsed,
    'oob_preds': oob_preds
}



Training 100 trees...
Training Random Forest Classifier with 100 trees...

ðŸ’» CPU MEMORY INFORMATION
ðŸ“Š System Memory:
   Total: 31.2 GB
   Available: 19.7 GB
   Used: 9.0 GB
   Usage: 36.8%



Training Random Forest:   0%|                                                                                                              | 0/100 tree [00:00<?, ?tree/s]

                                                                                                                                                                          

Training time: 0.44s (229.5 trees/sec)

ðŸ“Š OOB Error: 0.022472 (2.25%)
   OOB Accuracy: 97.75%

ðŸ“Š Confusion Matrix (rf.confusion_matrix):
          Pred 0  Pred 1  Pred 2
--------------------------------
True 0  |    59       0       0  
True 1  |     1      69       1  
True 2  |     0       2      46  

ðŸ“Š Classification Report (rf.classification_report):

Classification Report:
     Class    Precision       Recall     F1-Score      Support
----------------------------------------------------------
         0       0.9833       1.0000       0.9916           59
         1       0.9718       0.9718       0.9718           71
         2       0.9787       0.9583       0.9684           48





In [8]:
#CPU casewise
use_gpu = False
mode='gpu' if use_gpu else False
use_casewise=True
weighting = 'case-wise' if use_casewise else 'non-case-wise'
run_type='cpu_cw'
# Create model
model = rf.RandomForestClassifier(
    ntree=ntree,
    mtry=4,  # sqrt(13) â‰ˆ 3.6
    nsample=X.shape[0],
    nclass=n_classes,
    use_gpu=use_gpu,
    batch_size=25,  # Auto SM-aware batching with batch size 0
    iseed=42,
    compute_proximity=False,
    compute_importance=True,
    compute_local_importance=False,
    use_casewise=use_casewise
)

# Train
print(f"\nTraining {ntree} trees...")
start_time = time.time()
model.fit(X, y)
elapsed = time.time() - start_time


# Get results
oob_error = model.get_oob_error()
oob_preds = model.get_oob_predictions()

print(f"Training time: {elapsed:.2f}s ({ntree/elapsed:.1f} trees/sec)")


# OOB Error
print(f"\nðŸ“Š OOB Error: {oob_error:.6f} ({oob_error*100:.2f}%)")
print(f"   OOB Accuracy: {(1-oob_error)*100:.2f}%")

# Confusion Matrix (using RFX built-in)
cm = rf.confusion_matrix(y.astype(np.int32), oob_preds.astype(np.int32))
print(f"\nðŸ“Š Confusion Matrix (rf.confusion_matrix):")

print_confusion_matrix(cm, n_classes)

# Classification Report (using RFX built-in - now pure Python, Jupyter-safe)
print(f"ðŸ“Š Classification Report (rf.classification_report):")
print(rf.classification_report(y.astype(np.int32), oob_preds.astype(np.int32)))

results[run_type] = {
    'mode': f"{mode} {weighting}",
    'oob_error': oob_error,
    'confusion_matrix': cm,
    'time': elapsed,
    'oob_preds': oob_preds
}



Training 100 trees...
Training Random Forest Classifier with 100 trees...

ðŸ’» CPU MEMORY INFORMATION
ðŸ“Š System Memory:
   Total: 31.2 GB
   Available: 19.7 GB
   Used: 9.0 GB
   Usage: 36.9%



                                                                                                                                                                          

Training time: 0.66s (151.5 trees/sec)

ðŸ“Š OOB Error: 0.028090 (2.81%)
   OOB Accuracy: 97.19%

ðŸ“Š Confusion Matrix (rf.confusion_matrix):
          Pred 0  Pred 1  Pred 2
--------------------------------
True 0  |    59       0       0  
True 1  |     1      68       2  
True 2  |     0       2      46  

ðŸ“Š Classification Report (rf.classification_report):

Classification Report:
     Class    Precision       Recall     F1-Score      Support
----------------------------------------------------------
         0       0.9833       1.0000       0.9916           59
         1       0.9714       0.9577       0.9645           71
         2       0.9583       0.9583       0.9583           48





In [9]:
print("=" * 70)
print("  WINE CLASSIFICATION - OOB ERROR, CONFUSION MATRIX, CLASSIFICATION REPORT")
print("  Testing: GPU/CPU Ã— Casewise/Non-casewise")
print("=" * 70)



# Summary comparison
print("\n" + "=" * 70)
print("  SUMMARY COMPARISON")
print("=" * 70)

print("\nðŸ“Š OOB Errors:")
print(f"   {'Configuration':<25s} {'OOB Error':>12s} {'Accuracy':>12s} {'Time':>10s}")
print("   " + "-" * 60)
for key, res in results.items():
    print(f"   {res['mode']:<25s} {res['oob_error']:>12.6f} {(1-res['oob_error'])*100:>11.2f}% {res['time']:>9.2f}s")

# Note: Detailed F1 scores are printed in the classification report above
print("\nðŸ“Š Quick Summary:")

print("\nðŸ“Š Prediction Agreement (vs GPU Non-casewise):")
baseline = results['gpu_ncw']['oob_preds']
for key, res in results.items():
    if key != 'gpu_ncw':
        agreement = np.sum(res['oob_preds'] == baseline) / len(baseline)
        print(f"   {res['mode']:<25s} {agreement*100:>6.2f}% agreement")

print("\nðŸ“Š Casewise vs Non-casewise Differences:")
gpu_diff = abs(results['gpu_cw']['oob_error'] - results['gpu_ncw']['oob_error'])
cpu_diff = abs(results['cpu_cw']['oob_error'] - results['cpu_ncw']['oob_error'])
print(f"   GPU:  {gpu_diff:.6f} ({gpu_diff*100:.2f}% difference)")
print(f"   CPU:  {cpu_diff:.6f} ({cpu_diff*100:.2f}% difference)")

if gpu_diff < 0.001 and cpu_diff < 0.001:
    print("\n    WARNING: Casewise and non-casewise produce IDENTICAL results!")
else:
    print("\n   Casewise and non-casewise produce DIFFERENT results (expected!)")

print("\n" + "=" * 70)
print("  TEST COMPLETE")
print("=" * 70)

  WINE CLASSIFICATION - OOB ERROR, CONFUSION MATRIX, CLASSIFICATION REPORT
  Testing: GPU/CPU Ã— Casewise/Non-casewise

  SUMMARY COMPARISON

ðŸ“Š OOB Errors:
   Configuration                OOB Error     Accuracy       Time
   ------------------------------------------------------------
   gpu case-wise                 0.028090       97.19%     23.85s
   gpu non-case-wise             0.028090       97.19%     22.97s
   False non-case-wise           0.022472       97.75%      0.44s
   False case-wise               0.028090       97.19%      0.66s

ðŸ“Š Quick Summary:

ðŸ“Š Prediction Agreement (vs GPU Non-casewise):
   gpu case-wise              98.88% agreement
   False non-case-wise        98.31% agreement
   False case-wise            97.75% agreement

ðŸ“Š Casewise vs Non-casewise Differences:
   GPU:  0.000000 (0.00% difference)
   CPU:  0.005618 (0.56% difference)

   Casewise and non-casewise produce DIFFERENT results (expected!)

  TEST COMPLETE
