# Classification with C60.ai

This notebook demonstrates how to use C60.ai for a classification task using the Iris dataset.

## 1. Import Required Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from c60 import AutoML

# Set random seed for reproducibility
np.random.seed(42)

# Set plotting style
sns.set_style('whitegrid')
%matplotlib inline

## 2. Load and Explore the Data

In [None]:
# Load the Iris dataset
data = load_iris()
X, y = data.data, data.target

# Create a DataFrame for visualization
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target_names[y]

# Show the first few rows
print("Dataset shape:", X.shape)
df.head()

## 3. Data Visualization

In [None]:
# Pairplot to visualize relationships between features
plt.figure(figsize=(12, 8))
sns.pairplot(df, hue='target', height=2.5)
plt.suptitle('Iris Dataset - Pairplot', y=1.02)
plt.show()

## 4. Prepare the Data

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Training set shape:", X_train.shape)
print("Testing set shape:", X_test.shape)

## 5. Initialize and Train the AutoML Model

In [None]:
# Initialize AutoML
automl = AutoML(
    task='classification',
    time_budget=60,  # 1 minute
    metric='accuracy',
    n_jobs=-1,  # Use all available cores
    random_state=42
)

# Train the model
print("Training model...")
automl.fit(X_train, y_train)
print("Training completed!")

## 6. Evaluate the Model

In [None]:
# Make predictions
y_pred = automl.predict(X_test)
y_proba = automl.predict_proba(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")

# Print classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=data.target_names))

## 7. Visualize the Results

In [None]:
# Create confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=data.target_names,
            yticklabels=data.target_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

## 8. Feature Importance

In [None]:
# Check if the best model has feature importances
if hasattr(automl.best_estimator_, 'feature_importances_'):
    importances = automl.best_estimator_.feature_importances_
    feature_importance = pd.DataFrame({
        'Feature': data.feature_names,
        'Importance': importances
    }).sort_values('Importance', ascending=False)
    
    # Plot feature importance
    plt.figure(figsize=(10, 6))
    sns.barplot(x='Importance', y='Feature', data=feature_importance)
    plt.title('Feature Importance')
    plt.tight_layout()
    plt.show()

## 9. Save the Model

In [None]:
# Save the trained model
automl.save('iris_classifier.joblib')
print("Model saved as 'iris_classifier.joblib'")

# To load the model later:
# from c60 import AutoML
# automl_loaded = AutoML.load('iris_classifier.joblib')
# predictions = automl_loaded.predict(X_test)