In [2]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score


X, y = make_classification(
    n_samples=100,
    n_features=2,
    n_informative=2,  # Number of informative features
    n_redundant=0,    # Number of redundant features
    n_repeated=0,     # Number of repeated features
    n_classes=2,
    random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Logistic Regression
A statistical model used to predict the probability of a binary outcome. It estimates the parameters of a logistic function to output probabilities that are then mapped to class labels.

In [3]:
model = LogisticRegression()
model.fit(X_train, y_train)


predictions = model.predict(X_test)
print("Logistic Regression Accuracy:", accuracy_score(y_test, predictions))


Logistic Regression Accuracy: 0.9666666666666667


# k-Nearest Neighbors (k-NN)
A non-parametric method that classifies a data point based on the majority class among its 'k' nearest neighbors in the feature space. It relies on the distance metric (e.g., Euclidean distance).

In [4]:
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)


predictions = model.predict(X_test)
print("k-NN Accuracy:", accuracy_score(y_test, predictions))

k-NN Accuracy: 1.0


# Support Vector Machine (SVM)
Finds the optimal hyperplane that separates classes with the maximum margin. It can handle both linear and non-linear classification using different kernel functions.

In [6]:
model = SVC(kernel='linear')
model.fit(X_train, y_train)


predictions = model.predict(X_test)
print("SVM Accuracy:", accuracy_score(y_test, predictions))

SVM Accuracy: 1.0


# Decision Tree
 A model that splits data into subsets based on feature values, creating a tree-like structure of decisions. Each node represents a decision rule, and each branch represents the outcome of that rule.

In [7]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)


predictions = model.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test, predictions))

Decision Tree Accuracy: 0.9666666666666667


# Random Forest
An ensemble method that constructs multiple decision trees and aggregates their predictions. It reduces overfitting and improves accuracy by averaging the results from multiple trees.

In [8]:
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)


predictions = model.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, predictions))

Random Forest Accuracy: 0.9666666666666667


# Gradient Boosting
A boosting technique that builds models sequentially, with each new model correcting the errors of its predecessor. This improves performance by focusing on difficult cases.

In [10]:
model = GradientBoostingClassifier(n_estimators=100)
model.fit(X_train, y_train)


predictions = model.predict(X_test)
print("Gradient Boosting Accuracy:", accuracy_score(y_test, predictions))


Gradient Boosting Accuracy: 0.9666666666666667


# Naive Bayes
A probabilistic classifier based on Bayes' theorem with the assumption of feature independence given the class label. It calculates the probability of each class and selects the class with the highest probability.

In [11]:
model = GaussianNB()
model.fit(X_train, y_train)


predictions = model.predict(X_test)
print("Naive Bayes Accuracy:", accuracy_score(y_test, predictions))

Naive Bayes Accuracy: 0.9666666666666667
