In [1]:
# Key Differences Between Classification and Regression
# -----------------------------------------------------
# 1. **Definition**:
#    - Classification: Predicts discrete labels (e.g., cat, dog, yes, no).
#    - Regression: Predicts continuous values (e.g., temperature, price).
#
# 2. **Output**:
#    - Classification: Categorical output (e.g., Class A, Class B).
#    - Regression: Numerical output (e.g., 42.7, 13.9).
#
# 3. **Use Cases**:
#    - Classification: Spam detection, image recognition, fraud detection.
#    - Regression: House price prediction, stock market forecasting, temperature estimation.

# Example Python Code for Classification and Regression
from sklearn.datasets import make_classification, make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
import numpy as np

# Classification Example
# Creating a synthetic dataset for classification with binary output
X_class, y_class = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.3, random_state=42)

# Training a Random Forest Classifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train_class, y_train_class)

# Making predictions and evaluating accuracy
y_pred_class = clf.predict(X_test_class)
classification_accuracy = accuracy_score(y_test_class, y_pred_class)

# Displaying sample output for classification
print("Classification Example:")
print(f"Actual Labels:    {y_test_class[:10]}")
print(f"Predicted Labels: {y_pred_class[:10]}")
print(f"Classification Accuracy: {classification_accuracy:.2f}\n")

# Regression Example
# Creating a synthetic dataset for regression with continuous output
X_reg, y_reg = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.3, random_state=42)

# Training a Random Forest Regressor
reg = RandomForestRegressor(random_state=42)
reg.fit(X_train_reg, y_train_reg)

# Making predictions and evaluating mean squared error
y_pred_reg = reg.predict(X_test_reg)
regression_mse = mean_squared_error(y_test_reg, y_pred_reg)

# Displaying sample output for regression
print("Regression Example:")
print(f"Actual Values:    {np.round(y_test_reg[:10], 2)}")
print(f"Predicted Values: {np.round(y_pred_reg[:10], 2)}")
print(f"Regression Mean Squared Error: {regression_mse:.2f}\n")

# Summary
# - **Classification**: Used for problems where the output is a label/category.
# - **Regression**: Used for problems where the output is a real number.
# Both approaches use machine learning algorithms but are evaluated differently based on the problem domain.


Classification Example:
Actual Labels:    [0 1 1 1 0 1 0 0 0 0]
Predicted Labels: [0 1 0 1 0 1 0 0 0 0]
Classification Accuracy: 0.89

Regression Example:
Actual Values:    [  42.67   75.01   -4.06 -295.72   44.43   21.68 -146.52  -60.56  -15.32
  176.65]
Predicted Values: [  -5.16   32.33  -90.78 -247.03  -37.43   43.2  -127.52  -13.61    5.06
  159.58]
Regression Mean Squared Error: 2799.71

