<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/ML_HYPERPARAMETER_DEMO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.datasets import load_iris, make_regression, make_blobs
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import silhouette_score # Used to evaluate clustering

# --- Data Preparation ---
# 1. Classification Data (Iris)
iris = load_iris()
X_clf, y_clf = iris.data, iris.target
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X_clf, y_clf, test_size=0.3, random_state=42)

# 2. Regression Data
X_reg, y_reg = make_regression(n_samples=100, n_features=4, noise=10, random_state=42)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.3, random_state=42)

# 3. Clustering/Unsupervised Data
X_cluster, y_cluster = make_blobs(n_samples=150, centers=4, cluster_std=0.60, random_state=0)

# 4. Scaled Data (Required for KNN, PCA, MLP)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_clf)
X_test_scaled = scaler.transform(X_test_clf)
X_pca_scaled = scaler.fit_transform(X_clf)


print("--- Machine Learning Hyperparameter Demo (Scikit-learn) ---")

# =================================================================
# 1. Linear Regression (using Ridge for L1/L2 Penalty demo)
# =================================================================
model_lr = Ridge(alpha=10.0, solver='cholesky', fit_intercept=True)
model_lr.fit(X_train_reg, y_train_reg)
print(f"\n1. Linear Regression (Ridge):")
print(f"   Hyperparams: alpha={model_lr.alpha}, solver='{model_lr.solver}'")
print(f"   R^2 score: {model_lr.score(X_test_reg, y_test_reg):.2f}")

# =================================================================
# 2. Logistic Regression
# =================================================================
model_logreg = LogisticRegression(penalty='l2', C=0.1, solver='liblinear', class_weight='balanced', random_state=42)
model_logreg.fit(X_train_clf, y_train_clf)
print(f"\n2. Logistic Regression:")
print(f"   Hyperparams: penalty='{model_logreg.penalty}', C={model_logreg.C}, solver='{model_logreg.solver}', class_weight='{model_logreg.class_weight}'")
print(f"   Accuracy: {model_logreg.score(X_test_clf, y_test_clf):.2f}")

# =================================================================
# 3. Naive Bayes (Gaussian)
# =================================================================
# var_smoothing in GaussianNB acts as the Alpha (Laplace/Lidstone smoothing) prior
model_nb = GaussianNB(var_smoothing=1e-08)
model_nb.fit(X_train_clf, y_train_clf)
print(f"\n3. Naive Bayes (Gaussian):")
print(f"   Hyperparams: var_smoothing={model_nb.var_smoothing}")
print(f"   Accuracy: {model_nb.score(X_test_clf, y_test_clf):.2f}")

# =================================================================
# 4. Decision Tree
# =================================================================
model_dt = DecisionTreeClassifier(criterion='gini', max_depth=4, min_samples_split=5, random_state=42)
model_dt.fit(X_train_clf, y_train_clf)
print(f"\n4. Decision Tree:")
print(f"   Hyperparams: criterion='{model_dt.criterion}', max_depth={model_dt.max_depth}, min_samples_split={model_dt.min_samples_split}")
print(f"   Accuracy: {model_dt.score(X_test_clf, y_test_clf):.2f}")

# =================================================================
# 5. Random Forest
# =================================================================
model_rf = RandomForestClassifier(n_estimators=150, max_depth=8, max_features='sqrt', criterion='entropy', random_state=42)
model_rf.fit(X_train_clf, y_train_clf)
print(f"\n5. Random Forest:")
print(f"   Hyperparams: n_estimators={model_rf.n_estimators}, max_depth={model_rf.max_depth}, max_features='{model_rf.max_features}', criterion='{model_rf.criterion}'")
print(f"   Accuracy: {model_rf.score(X_test_clf, y_test_clf):.2f}")

# =================================================================
# 6. Gradient Boosted Trees
# =================================================================
model_gb = GradientBoostingClassifier(n_estimators=100, learning_rate=0.05, max_depth=3, min_samples_split=10, random_state=42)
model_gb.fit(X_train_clf, y_train_clf)
print(f"\n6. Gradient Boosted Trees:")
print(f"   Hyperparams: n_estimators={model_gb.n_estimators}, learning_rate={model_gb.learning_rate}, max_depth={model_gb.max_depth}, min_samples_split={model_gb.min_samples_split}")
print(f"   Accuracy: {model_gb.score(X_test_clf, y_test_clf):.2f}")

# =================================================================
# 7. K-Nearest Neighbor (KNN)
# =================================================================
model_knn = KNeighborsClassifier(n_neighbors=7, algorithm='kd_tree')
model_knn.fit(X_train_scaled, y_train_clf)
print(f"\n7. K-Nearest Neighbor:")
print(f"   Hyperparams: n_neighbors={model_knn.n_neighbors}, algorithm='{model_knn.algorithm}'")
print(f"   Accuracy: {model_knn.score(X_test_scaled, y_test_clf):.2f}")

# =================================================================
# 8. K-Means (Unsupervised)
# =================================================================
model_kmeans = KMeans(n_clusters=4, init='k-means++', max_iter=500, n_init='auto', random_state=42)
model_kmeans.fit(X_cluster)
score = silhouette_score(X_cluster, model_kmeans.labels_) # Evaluation metric for clustering
print(f"\n8. K-Means:")
print(f"   Hyperparams: n_clusters={model_kmeans.n_clusters}, init='{model_kmeans.init}', max_iter={model_kmeans.max_iter}")
print(f"   Silhouette Score (higher is better): {score:.2f}")

# =================================================================
# 9. Principal Component Analysis (PCA)
# =================================================================
# PCA is a transformation; the 'explained_variance_ratio_' shows its effectiveness.
model_pca = PCA(n_components=2, svd_solver='randomized')
model_pca.fit(X_pca_scaled)
print(f"\n9. Principal Component Analysis:")
print(f"   Hyperparams: n_components={model_pca.n_components}, svd_solver='{model_pca.svd_solver}'")
print(f"   Explained Variance Ratio: {model_pca.explained_variance_ratio_.sum():.2f} (Variance captured by 2 components)")

# =================================================================
# 10. Dense Neural Networks (MLPClassifier)
# =================================================================
model_nn = MLPClassifier(
    hidden_layer_sizes=(10, 5), # Hidden Layer Sizes
    activation='relu',          # Activation
    solver='adam',              # Solver
    alpha=0.001,                # Alpha (L2 regularization)
    learning_rate_init=0.01,    # Learning Rate
    max_iter=500,               # Max Iterations
    random_state=42
)
model_nn.fit(X_train_scaled, y_train_clf)
print(f"\n10. Dense Neural Networks (MLP):")
print(f"    Hyperparams: hidden_layer_sizes={model_nn.hidden_layer_sizes}, activation='{model_nn.activation}', solver='{model_nn.solver}', alpha={model_nn.alpha}, learning_rate_init={model_nn.learning_rate_init}")
print(f"    Accuracy: {model_nn.score(X_test_scaled, y_test_clf):.2f}")

--- Machine Learning Hyperparameter Demo (Scikit-learn) ---

1. Linear Regression (Ridge):
   Hyperparams: alpha=10.0, solver='cholesky'
   R^2 score: 0.96

2. Logistic Regression:
   Hyperparams: penalty='l2', C=0.1, solver='liblinear', class_weight='balanced'
   Accuracy: 0.82

3. Naive Bayes (Gaussian):
   Hyperparams: var_smoothing=1e-08
   Accuracy: 0.98

4. Decision Tree:
   Hyperparams: criterion='gini', max_depth=4, min_samples_split=5
   Accuracy: 1.00

5. Random Forest:
   Hyperparams: n_estimators=150, max_depth=8, max_features='sqrt', criterion='entropy'
   Accuracy: 1.00

6. Gradient Boosted Trees:
   Hyperparams: n_estimators=100, learning_rate=0.05, max_depth=3, min_samples_split=10
   Accuracy: 1.00

7. K-Nearest Neighbor:
   Hyperparams: n_neighbors=7, algorithm='kd_tree'
   Accuracy: 1.00

8. K-Means:
   Hyperparams: n_clusters=4, init='k-means++', max_iter=500
   Silhouette Score (higher is better): 0.67

9. Principal Component Analysis:
   Hyperparams: n_components=