## Classification

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
models = {
 "Logistic Regression": LogisticRegression(),
 "k-Nearest Neighbors (k-NN)": KNeighborsClassifier(),
 "Support Vector Machine (SVM)": SVC(),
 "Decision Tree": DecisionTreeClassifier(),
 "Random Forest": RandomForestClassifier(),
 "Gradient Boosting Machines (GBM)": GradientBoostingClassifier(),
 "AdaBoost (Adaptive Boosting)": AdaBoostClassifier(),
 "Gaussian Naive Bayes": GaussianNB(),
 "Multinomial Naive Bayes": MultinomialNB(),
 "Bernoulli Naive Bayes": BernoulliNB(),
 "Neural Networks": MLPClassifier(),
 "Linear Discriminant Analysis (LDA)": LinearDiscriminantAnalysis(),
 "Quadratic Discriminant Analysis (QDA)": QuadraticDiscriminantAnalysis(),
}
# Evaluate each model
results = {}
for name, model in models.items():
 model.fit(X_train, y_train)
 y_pred = model.predict(X_test)

NameError: name 'LogisticRegression' is not defined

In [None]:
# Calculate performance metrics
 accuracy = accuracy_score(y_test, y_pred)
 precision = precision_score(y_test, y_pred, average='weighted')
 recall = recall_score(y_test, y_pred, average='weighted')
 f1 = f1_score(y_test, y_pred, average='weighted')

 results[name] = {
 "Accuracy": accuracy,
 "Precision": precision,
 "Recall": recall,
 "F1 Score": f1
 }
# Visualization
model_names = list(results.keys())
accuracy_scores = [results[model]['Accuracy'] for model in model_names]
f1_scores = [results[model]['F1 Score'] for model in model_names]
# Plot Accuracy
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
bars_accuracy = plt.bar(model_names, accuracy_scores, color='skyblue', edgecolor='k')
plt.xticks(rotation=45, ha='right')
plt.ylabel('Accuracy')
plt.title('Accuracy for Classification Models')
# Highlight the best model for Accuracy
best_accuracy_idx = np.argmax(accuracy_scores)
bars_accuracy[best_accuracy_idx].set_color('orange')
plt.text(best_accuracy_idx, accuracy_scores[best_accuracy_idx] + 0.02, 'Best Model')
# Plot F1 Score
plt.subplot(1, 2, 2)
bars_f1 = plt.bar(model_names, f1_scores, color='lightgreen', edgecolor='k')
plt.xticks(rotation=45, ha='right')
plt.ylabel('F1 Score')
plt.title('F1 Score for Classification Models')
# Highlight the best model for F1 Score
best_f1_idx = np.argmax(f1_scores)
bars_f1[best_f1_idx].set_color('orange')
plt.text(best_f1_idx, f1_scores[best_f1_idx] + 0.02, 'Best Model', ha='center', color='red')
# Show plots
plt.tight_layout()
plt.show()


 # Regression


In [None]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
models = {
 "Linear Regression": LinearRegression(),
 "Ridge Regression": Ridge(),
 "Lasso Regression": Lasso(),
 "Polynomial Regression": PolynomialFeatures(), # Note: PolynomialFeatures is n
 "Support Vector Regression (SVR)": SVR(),
 "Decision Tree Regressor": DecisionTreeRegressor(),
 "Random Forest Regressor": RandomForestRegressor(),
 "Gradient Boosting Regressor": GradientBoostingRegressor(),
 "AdaBoost Regressor": AdaBoostRegressor(),
 "k-Nearest Neighbors Regressor (k-NN)": KNeighborsRegressor(),
}
results = {}
for name, model in models.items():
 model.fit(X_train, y_train)
 y_pred = model.predict(X_test)
 mse = mean_squared_error(y_test, y_pred)
 r2 = r2_score(y_test, y_pred)
 results[name] = {
 "Mean Squared Error": mse,
 "R^2 Score": r2
 }


# Extract model names
models = list(results.keys())
# Extract MSE and R^2 Scores
mse_values = [results[model]['Mean Squared Error'] for model in models]
r2_values = [results[model]['R^2 Score'] for model in models]
# Find the best models
best_mse_idx = np.argmin(mse_values)
best_r2_idx = np.argmax(r2_values)
# Plot MSE
plt.figure(figsize=(14, 6))
# Bar plot for Mean Squared Error
plt.subplot(1, 2, 1)
bars = plt.bar(models, mse_values, color='skyblue', edgecolor='k')
plt.xticks(rotation=45, ha='right')
plt.ylabel('Mean Squared Error')
plt.title('Mean Squared Error for Regression Models')
# Highlight the best model for MSE
bars[best_mse_idx].set_color('orange')
plt.text(best_mse_idx, mse_values[best_mse_idx] + 500, 'Best Model', ha='center', c
# Plot R^2 Score
plt.subplot(1, 2, 2)
bars_r2 = plt.bar(models, r2_values, color='lightgreen', edgecolor='k')
plt.xticks(rotation=45, ha='right')
plt.ylabel('R^2 Score')
plt.title('R^2 Score for Regression Models')
# Highlight the best model for R^2 Score
bars_r2[best_r2_idx].set_color('orange')
plt.text(best_r2_idx, r2_values[best_r2_idx] + 0.05, 'Best Model', ha='center', col
# Show plots
plt.tight_layout()
plt.show()

# Clustering

In [1]:
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering, MeanShift, SpectralClustering
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler

# Define clustering models
clustering_models = {
    "KMeans": KMeans(),
    "DBSCAN": DBSCAN(),
    "Agglomerative Clustering": AgglomerativeClustering(),
    "Mean Shift": MeanShift(),
    "Spectral Clustering": SpectralClustering(),
    "Gaussian Mixture Model (GMM)": GaussianMixture(),
}

# Note: StandardScaler is typically used to scale features before applying clustering algorithms, 
# but it is not a clustering model itself.


# Dimensionality reduction

In [3]:
from sklearn.decomposition import PCA, NMF, FastICA, TruncatedSVD
from sklearn.manifold import TSNE, Isomap
from sklearn.preprocessing import StandardScaler
dimensionality_reduction_models = {
 "Principal Component Analysis (PCA)": PCA(),
 "Non-Negative Matrix Factorization (NMF)": NMF(),
 "Fast Independent Component Analysis (FastICA)": FastICA(),
 "Truncated Singular Value Decomposition (TruncatedSVD)": TruncatedSVD(),
 "t-Distributed Stochastic Neighbor Embedding (t-SNE)": TSNE(),
 "Isomap": Isomap()
}