In [None]:
#!/usr/bin/env python3
"""
Wine Classification - Proximity Matrix + RFViz Visualization
Tests all 4 configurations: GPU/CPU √ó Casewise/Non-casewise
GPU: Low-rank NF4 quantized proximity
CPU: Full dense proximity matrix
RFViz: Inline HTML interactive visualization
"""

import sys
import os
import numpy as np
import rfx as rf
import time
from IPython.display import HTML, display

In [2]:
# Feature names for Wine dataset
FEATURE_NAMES = [
    'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash',
    'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols',
    'Proanthocyanins', 'Color intensity', 'Hue',
    'OD280/OD315 of diluted wines', 'Proline'
]

CLASS_NAMES = ['Class 0', 'Class 1', 'Class 2']

def print_confusion_matrix(cm, n_classes):
    """Pretty print confusion matrix"""
    # Header
    header = "          " + "  ".join(f"Pred {i}" for i in range(n_classes))
    print(header)
    print("-" * len(header))
    
    # Rows
    for i in range(n_classes):
        row = f"True {i}  |"
        for j in range(n_classes):
            row += f"   {cm[i, j]:3d}  "
        print(row)
    print()

 

In [None]:
# Load Wine dataset (built-in)
X, y = rf.load_wine()
n_samples, n_features = X.shape
n_classes = len(np.unique(y))

print(f"\nDataset: Wine (UCI ML - built-in)")
print(f"   Samples: {n_samples}")
print(f"   Features: {n_features}")
print(f"   Classes: {n_classes}")
print(f"   Class distribution: {np.bincount(y).tolist()}")

# Run all 4 configurations
ntree = 1000
results = {}


In [10]:
#1. GPU casewise

In [None]:
#GPU casewise with LOW-RANK NF4 proximity
use_gpu = True
mode='gpu' if use_gpu else 'cpu'
use_casewise=True
weighting = 'case-wise' if use_casewise else 'non-case-wise'
run_type='gpu_cw'

print(f"\n{'='*70}")
print(f"  {mode.upper()} {weighting.upper()} - LOW-RANK PROXIMITY")
print(f"{'='*70}")

# Create model with low-rank NF4 proximity
model_gpu_cw = rf.RandomForestClassifier(
    ntree=ntree,
    mtry=4,
    nsample=X.shape[0],
    nclass=n_classes,
    use_gpu=use_gpu,
    batch_size=0,
    iseed=42,
    compute_proximity=True,           # Enable proximity
    compute_importance=True,
    compute_local_importance=True,
    use_casewise=use_casewise,
    use_qlora=True,                   # Low-rank factorization
    quant_mode='int8',                # NF4 quantization
    rank=32                     # Low-rank factor rank
)

# Train
print(f"\nTraining {ntree} trees with low-rank proximity...")
start_time = time.time()
model_gpu_cw.fit(X, y)
elapsed = time.time() - start_time

# Get results
oob_error = model_gpu_cw.get_oob_error()
oob_preds = model_gpu_cw.get_oob_predictions()

print(f"Training time: {elapsed:.2f}s ({ntree/elapsed:.1f} trees/sec)")
print(f"\n OOB Error: {oob_error:.6f} ({oob_error*100:.2f}%)")
print(f"   OOB Accuracy: {(1-oob_error)*100:.2f}%")

# Low-rank factors info
A, B, actual_rank = model_gpu_cw.get_lowrank_factors()
print(f"\n Low-Rank Factors: A={A.shape}, B={B.shape}, rank={actual_rank}")
full_mem = n_samples**2 * 4 / (1024**2)
lr_mem = 2 * n_samples * actual_rank * 4 / (1024**2)
print(f"   Memory: {lr_mem:.4f} MB (vs {full_mem:.4f} MB full) = {full_mem/lr_mem:.1f}x compression")

# Confusion Matrix
cm = rf.confusion_matrix(y.astype(np.int32), oob_preds.astype(np.int32))
print(f"\n Confusion Matrix:")
print_confusion_matrix(cm, n_classes)

results[run_type] = {
    'mode': f"{mode} {weighting}",
    'oob_error': oob_error,
    'confusion_matrix': cm,
    'time': elapsed,
    'oob_preds': oob_preds,
    'model': model_gpu_cw
}



  GPU CASE-WISE - LOW-RANK PROXIMITY

Training 1000 trees with low-rank proximity...
Training Random Forest Classifier with 1000 trees...
GPU batch_size=100 (auto)

üñ•Ô∏è  GPU MEMORY INFORMATION
üìä GPU Memory:
   Total: 12287 MB
   Available: 0 MB
   Used: 12287 MB



Training Random Forest:   0%|                                                                                                      | 0/1000 tree [00:00<?, ?tree/s]

üîß Auto-scaling: Selected batch size = 100 trees (out of 1000 total)


Training Random Forest:  10%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè                                                                                  | 100/1000 tree [02:18<20:44,  1.38s/tree]

In [5]:
#Lowrank Factor A
print(A)

[[0.21859811 0.17212451 0.07917727 ... 0.14458458 0.02926116 0.0499161 ]
 [0.05163735 0.         0.01721245 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]


In [6]:
#Lowrank Factor B
print(B)

[[0.21859811 0.17212451 0.07917727 ... 0.14458458 0.02926116 0.0499161 ]
 [0.05163735 0.         0.01721245 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]


In [None]:
import plotly.graph_objects as go
import numpy as np

# Your MDS coordinates (shape: n_samples x 3)
# Create 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=mds_3d[:, 0],
    y=mds_3d[:, 1],
    z=mds_3d[:, 2],
    mode='markers',
    marker=dict(
        size=5,
        color=y,  # color by class label
        colorscale='Viridis',
        opacity=0.8
    ),
    text=[f'Sample {i}' for i in range(len(y))]
)])

fig.update_layout(
    title='3D MDS Plot',
    scene=dict(
        xaxis_title='MDS 1',
        yaxis_title='MDS 2',
        zaxis_title='MDS 3'
    )
)

In [None]:
# RFViz for GPU Casewise (Low-Rank NF4)
print("Generating RFViz visualization...")
fig_gpu_cw = rf.rfviz(
    rf_model=model_gpu_cw,
    X=X,
    y=y,
    feature_names=FEATURE_NAMES,
    n_clusters=3,
    title="GPU Casewise - Low-Rank Proximity",
    output_file="rfviz_gpu_cw3.html",
    show_in_browser=False,
    save_html=True,
    mds_k=3
)

# Load saved HTML and display in Jupyter
with open("rfviz_gpu_cw3.html", "r") as f:
    html_gpu_cw = f.read()
display(HTML(html_gpu_cw))

In [None]:
#GPU noncasewise with LOW-RANK NF4 proximity
use_gpu = True
mode='gpu' if use_gpu else 'cpu'
use_casewise=False
weighting = 'case-wise' if use_casewise else 'non-case-wise'
run_type='gpu_ncw'

print(f"\n{'='*70}")
print(f"  {mode.upper()} {weighting.upper()} - LOW-RANK NF4 PROXIMITY")
print(f"{'='*70}")

# Create model with low-rank NF4 proximity
model_gpu_ncw = rf.RandomForestClassifier(
    ntree=ntree,
    mtry=4,
    nsample=X.shape[0],
    nclass=n_classes,
    use_gpu=use_gpu,
    batch_size=25,
    iseed=42,
    compute_proximity=True,
    compute_importance=True,
    compute_local_importance=True,
    use_casewise=use_casewise,
    use_qlora=True,
    quant_mode='nf4',
    rank=32
)

# Train
print(f"\nTraining {ntree} trees with low-rank NF4 proximity...")
start_time = time.time()
model_gpu_ncw.fit(X, y)
elapsed = time.time() - start_time

# Get results
oob_error = model_gpu_ncw.get_oob_error()
oob_preds = model_gpu_ncw.get_oob_predictions()

print(f"Training time: {elapsed:.2f}s ({ntree/elapsed:.1f} trees/sec)")
print(f"\n OOB Error: {oob_error:.6f} ({oob_error*100:.2f}%)")
print(f"   OOB Accuracy: {(1-oob_error)*100:.2f}%")

# Low-rank factors info
A, B, actual_rank = model_gpu_ncw.get_lowrank_factors()
print(f"\n Low-Rank Factors: A={A.shape}, B={B.shape}, rank={actual_rank}")
full_mem = n_samples**2 * 4 / (1024**2)
lr_mem = 2 * n_samples * actual_rank * 4 / (1024**2)
print(f"   Memory: {lr_mem:.4f} MB (vs {full_mem:.4f} MB full) = {full_mem/lr_mem:.1f}x compression")

# Confusion Matrix
cm = rf.confusion_matrix(y.astype(np.int32), oob_preds.astype(np.int32))
print(f"\n Confusion Matrix:")
print_confusion_matrix(cm, n_classes)

results[run_type] = {
    'mode': f"{mode} {weighting}",
    'oob_error': oob_error,
    'confusion_matrix': cm,
    'time': elapsed,
    'oob_preds': oob_preds,
    'model': model_gpu_ncw
}


In [None]:
# RFViz for GPU Non-casewise (Low-Rank NF4)
print("Generating RFViz visualization...")
fig_gpu_ncw = rf.rfviz(
    rf_model=model_gpu_ncw,
    X=X,
    y=y,
    feature_names=FEATURE_NAMES,
    n_clusters=3,
    title="GPU Non-casewise - Low-Rank NF4 Proximity",
    output_file="rfviz_gpu_ncw.html",
    show_in_browser=False,
    save_html=True,
    mds_k=3
)

# Load saved HTML and display in Jupyter
with open("rfviz_gpu_ncw.html", "r") as f:
    html_gpu_ncw = f.read()
display(HTML(html_gpu_ncw))

In [None]:
#CPU noncasewise with FULL DENSE proximity
use_gpu = False
mode='gpu' if use_gpu else 'cpu'
use_casewise=False
weighting = 'case-wise' if use_casewise else 'non-case-wise'
run_type='cpu_ncw'

print(f"\n{'='*70}")
print(f"  {mode.upper()} {weighting.upper()} - FULL DENSE PROXIMITY")
print(f"{'='*70}")

# Create model with full proximity (CPU)
model_cpu_ncw = rf.RandomForestClassifier(
    ntree=ntree,
    mtry=4,
    nsample=X.shape[0],
    nclass=n_classes,
    use_gpu=use_gpu,
    iseed=42,
    compute_proximity=True,           # Full proximity matrix
    compute_importance=True,
    compute_local_importance=True,
    use_casewise=use_casewise,
    use_qlora=False                   # No low-rank for CPU
)

# Train
print(f"\nTraining {ntree} trees with full proximity...")
start_time = time.time()
model_cpu_ncw.fit(X, y)
elapsed = time.time() - start_time

# Get results
oob_error = model_cpu_ncw.get_oob_error()
oob_preds = model_cpu_ncw.get_oob_predictions()

print(f"Training time: {elapsed:.2f}s ({ntree/elapsed:.1f} trees/sec)")
print(f"\n OOB Error: {oob_error:.6f} ({oob_error*100:.2f}%)")
print(f"   OOB Accuracy: {(1-oob_error)*100:.2f}%")

# Proximity info
prox = model_cpu_ncw.get_proximity_matrix()
prox = np.array(prox).reshape(n_samples, n_samples)
print(f"\n Full Proximity Matrix: {prox.shape}")
print(f"   Diagonal mean: {np.diag(prox).mean():.4f}")
print(f"   Off-diagonal mean: {prox[np.triu_indices(n_samples, k=1)].mean():.4f}")

# Confusion Matrix
cm = rf.confusion_matrix(y.astype(np.int32), oob_preds.astype(np.int32))
print(f"\n Confusion Matrix:")
print_confusion_matrix(cm, n_classes)

results[run_type] = {
    'mode': f"{mode} {weighting}",
    'oob_error': oob_error,
    'confusion_matrix': cm,
    'time': elapsed,
    'oob_preds': oob_preds,
    'model': model_cpu_ncw
}


In [None]:
# RFViz for CPU Non-casewise (Full Proximity)
print("Generating RFViz visualization...")
fig_cpu_ncw = rf.rfviz(
    rf_model=model_cpu_ncw,
    X=X,
    y=y,
    feature_names=FEATURE_NAMES,
    n_clusters=3,
    title="CPU Non-casewise - Full Dense Proximity",
    output_file="rfviz_cpu_ncw.html",
    show_in_browser=False,
    save_html=True,
    mds_k=3
)

# Load saved HTML and display in Jupyter
with open("rfviz_cpu_ncw.html", "r") as f:
    html_cpu_ncw = f.read()
display(HTML(html_cpu_ncw))

In [None]:
#CPU casewise with FULL DENSE proximity
use_gpu = False
mode='gpu' if use_gpu else 'cpu'
use_casewise=True
weighting = 'case-wise' if use_casewise else 'non-case-wise'
run_type='cpu_cw'

print(f"\n{'='*70}")
print(f"  {mode.upper()} {weighting.upper()} - FULL DENSE PROXIMITY")
print(f"{'='*70}")

# Create model with full proximity (CPU)
model_cpu_cw = rf.RandomForestClassifier(
    ntree=ntree,
    mtry=4,
    nsample=X.shape[0],
    nclass=n_classes,
    use_gpu=use_gpu,
    iseed=42,
    compute_proximity=True,
    compute_importance=True,
    compute_local_importance=True,
    use_casewise=use_casewise,
    use_qlora=False
)

# Train
print(f"\nTraining {ntree} trees with full proximity...")
start_time = time.time()
model_cpu_cw.fit(X, y)
elapsed = time.time() - start_time

# Get results
oob_error = model_cpu_cw.get_oob_error()
oob_preds = model_cpu_cw.get_oob_predictions()

print(f"Training time: {elapsed:.2f}s ({ntree/elapsed:.1f} trees/sec)")
print(f"\n OOB Error: {oob_error:.6f} ({oob_error*100:.2f}%)")
print(f"   OOB Accuracy: {(1-oob_error)*100:.2f}%")

# Proximity info
prox = model_cpu_cw.get_proximity_matrix()
prox = np.array(prox).reshape(n_samples, n_samples)
print(f"\n Full Proximity Matrix: {prox.shape}")
print(f"   Diagonal mean: {np.diag(prox).mean():.4f}")
print(f"   Off-diagonal mean: {prox[np.triu_indices(n_samples, k=1)].mean():.4f}")

# Confusion Matrix
cm = rf.confusion_matrix(y.astype(np.int32), oob_preds.astype(np.int32))
print(f"\n Confusion Matrix:")
print_confusion_matrix(cm, n_classes)

results[run_type] = {
    'mode': f"{mode} {weighting}",
    'oob_error': oob_error,
    'confusion_matrix': cm,
    'time': elapsed,
    'oob_preds': oob_preds,
    'model': model_cpu_cw
}


In [None]:
# RFViz for CPU Casewise (Full Proximity)
print("Generating RFViz visualization...")
fig_cpu_cw = rf.rfviz(
    rf_model=model_cpu_cw,
    X=X,
    y=y,
    feature_names=FEATURE_NAMES,
    n_clusters=3,
    title="CPU Casewise - Full Dense Proximity",
    output_file="rfviz_cpu_cw.html",
    show_in_browser=False,
    save_html=True,
    mds_k=3
)

# Load saved HTML and display in Jupyter
with open("rfviz_cpu_cw.html", "r") as f:
    html_cpu_cw = f.read()
display(HTML(html_cpu_cw))

In [None]:
print("=" * 70)
print("  WINE CLASSIFICATION - PROXIMITY + RFVIZ SUMMARY")
print("  GPU: Low-Rank NF4 | CPU: Full Dense")
print("=" * 70)

# Summary comparison
print("\n" + "=" * 70)
print("  SUMMARY COMPARISON")
print("=" * 70)

print("\n OOB Errors:")
print(f"   {'Configuration':<25s} {'OOB Error':>12s} {'Accuracy':>12s} {'Time':>10s}")
print("   " + "-" * 60)
for key, res in results.items():
    print(f"   {res['mode']:<25s} {res['oob_error']:>12.6f} {(1-res['oob_error'])*100:>11.2f}% {res['time']:>9.2f}s")

print("\n Proximity Methods:")
print(f"   GPU models: Low-Rank NF4 (rank=32, ~2.8x compression)")
print(f"   CPU models: Full Dense Matrix ({n_samples}√ó{n_samples})")

print("\n Casewise vs Non-casewise Differences:")
gpu_diff = abs(results['gpu_cw']['oob_error'] - results['gpu_ncw']['oob_error'])
cpu_diff = abs(results['cpu_cw']['oob_error'] - results['cpu_ncw']['oob_error'])
print(f"   GPU:  {gpu_diff:.6f} ({gpu_diff*100:.2f}% difference)")
print(f"   CPU:  {cpu_diff:.6f} ({cpu_diff*100:.2f}% difference)")

if gpu_diff < 0.001 and cpu_diff < 0.001:
    print("\n     WARNING: Casewise and non-casewise produce IDENTICAL results!")
else:
    print("\n    Casewise and non-casewise produce DIFFERENT results (expected!)")

print("\n RFViz Visualizations: 4 interactive plots displayed above")
print("   ‚Ä¢ Linked brushing: Select samples in any plot")
print("   ‚Ä¢ 3D MDS: Drag to rotate, scroll to zoom")

print("\n" + "=" * 70)
print("  TEST COMPLETE")
print("=" * 70)