<a href="https://colab.research.google.com/github/gredy/2021Z-DataVisualizationTechniques/blob/master/classifiers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import Perceptron, LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Generate synthetic data
np.random.seed(42)
num_samples = 1000
X = np.random.rand(num_samples, 5)  # 5 features
y = np.random.choice(["M", "N"], size=num_samples)  # Binary labels

# Encode target variable (M = 0, N = 1)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define classifiers
classifiers = {
    "Perceptron": Perceptron(),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "SVM": SVC(kernel='linear'),
    "Gradient Boosting": GradientBoostingClassifier(),
    "Naïve Bayes": GaussianNB(),
}

# Train and evaluate models
results = {}
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc
    print(f"\n{name} - Accuracy: {acc:.4f}")
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Display results as DataFrame
results_df = pd.DataFrame(list(results.items()), columns=["Model", "Accuracy"])
display(results_df)
# This will display the results_df DataFrame in the output.




Perceptron - Accuracy: 0.5250
              precision    recall  f1-score   support

           M       0.53      0.32      0.40        99
           N       0.52      0.72      0.61       101

    accuracy                           0.53       200
   macro avg       0.53      0.52      0.50       200
weighted avg       0.53      0.53      0.51       200


KNN - Accuracy: 0.5000
              precision    recall  f1-score   support

           M       0.50      0.53      0.51        99
           N       0.51      0.48      0.49       101

    accuracy                           0.50       200
   macro avg       0.50      0.50      0.50       200
weighted avg       0.50      0.50      0.50       200


Logistic Regression - Accuracy: 0.5500
              precision    recall  f1-score   support

           M       0.57      0.35      0.44        99
           N       0.54      0.74      0.62       101

    accuracy                           0.55       200
   macro avg       0.56      0.55

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Gradient Boosting - Accuracy: 0.4700
              precision    recall  f1-score   support

           M       0.46      0.39      0.42        99
           N       0.48      0.54      0.51       101

    accuracy                           0.47       200
   macro avg       0.47      0.47      0.47       200
weighted avg       0.47      0.47      0.47       200


Naïve Bayes - Accuracy: 0.5000
              precision    recall  f1-score   support

           M       0.49      0.32      0.39        99
           N       0.50      0.67      0.58       101

    accuracy                           0.50       200
   macro avg       0.50      0.50      0.48       200
weighted avg       0.50      0.50      0.48       200



Unnamed: 0,Model,Accuracy
0,Perceptron,0.525
1,KNN,0.5
2,Logistic Regression,0.55
3,Decision Tree,0.48
4,Random Forest,0.46
5,SVM,0.505
6,Gradient Boosting,0.47
7,Naïve Bayes,0.5
