# TurboCat v0.3.0 - Test Notebook

Quick verification that TurboCat is working correctly.

## 1. Installation Check

In [None]:
import turbocat as tc
print(f"TurboCat version: {tc.__version__}")

## 2. Generate Sample Data

In [None]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Generate synthetic classification data
X, y = make_classification(
    n_samples=5000,
    n_features=20,
    n_informative=10,
    n_redundant=5,
    n_clusters_per_class=2,
    random_state=42
)

# Convert to float32 (TurboCat requirement)
X = X.astype(np.float32)
y = y.astype(np.float32)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"Train: {X_train.shape}, Test: {X_test.shape}")

## 3. Train TurboCat Classifier

In [None]:
# Create and train classifier
model = tc.TurboCatClassifier(
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.8,
    mode="small",  # Regular trees for best quality
    verbosity=1
)

print("Training...")
model.fit(X_train, y_train)
print(f"\nTrained {model.n_trees} trees")

## 4. Evaluate Model

In [None]:
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report

# Predictions
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]  # Probability of class 1

# Metrics
accuracy = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_proba)

print(f"Accuracy: {accuracy:.4f}")
print(f"AUC-ROC:  {auc:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

## 5. Feature Importance

In [None]:
import matplotlib.pyplot as plt

# Get feature importance
importance = model.feature_importance()

# Plot
plt.figure(figsize=(10, 6))
plt.barh(range(len(importance)), importance)
plt.xlabel('Importance')
plt.ylabel('Feature Index')
plt.title('TurboCat Feature Importance')
plt.tight_layout()
plt.show()

## 6. Compare Tree Modes

In [None]:
import time

print("Mode Comparison:")
print("="*60)

for mode in ["small", "large"]:
    model = tc.TurboCatClassifier(
        n_estimators=100,
        max_depth=6,
        learning_rate=0.1,
        mode=mode,
        verbosity=0
    )
    
    # Train
    t0 = time.time()
    model.fit(X_train, y_train)
    train_time = time.time() - t0
    
    # Predict
    t0 = time.time()
    y_proba = model.predict_proba(X_test)[:, 1]
    pred_time = time.time() - t0
    
    auc = roc_auc_score(y_test, y_proba)
    
    tree_type = "Regular trees" if mode == "small" else "Symmetric trees"
    print(f"{mode:6s} ({tree_type:16s}): AUC={auc:.4f}, Train={train_time:.3f}s, Predict={pred_time*1000:.2f}ms")

## 7. Regression Example

In [None]:
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error, r2_score

# Generate regression data
X_reg, y_reg = make_regression(
    n_samples=3000,
    n_features=15,
    n_informative=10,
    noise=10,
    random_state=42
)

X_reg = X_reg.astype(np.float32)
y_reg = y_reg.astype(np.float32)

X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=42
)

# Train regressor
reg = tc.TurboCatRegressor(
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    verbosity=0
)

reg.fit(X_train_r, y_train_r)

# Evaluate
y_pred_r = reg.predict(X_test_r)
mse = mean_squared_error(y_test_r, y_pred_r)
r2 = r2_score(y_test_r, y_pred_r)

print(f"MSE:  {mse:.4f}")
print(f"RMSE: {np.sqrt(mse):.4f}")
print(f"R2:   {r2:.4f}")

## 8. Summary

In [None]:
print("="*50)
print(f"TurboCat v{tc.__version__} - All tests passed!")
print("="*50)
print("")
print("Tree modes:")
print("  - 'small': Regular trees, best quality")
print("  - 'large': Symmetric trees, faster inference")
print("  - 'auto':  Automatic selection based on data size")