In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split


# ---------------------------------------------------------
# 1. Sigmoid function
# ---------------------------------------------------------
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


# ---------------------------------------------------------
# 2. Logistic Regression (NumPy Implementation)
# ---------------------------------------------------------
class LogisticRegressionScratch:
    def __init__(self, learning_rate=0.1, max_iters=3000):
        self.learning_rate = learning_rate
        self.max_iters = max_iters

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.max_iters):
            linear = np.dot(X, self.weights) + self.bias
            predictions = sigmoid(linear)

            # Gradients
            dw = (1 / n_samples) * np.dot(X.T, (predictions - y))
            db = (1 / n_samples) * np.sum(predictions - y)

            # Update weights
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        linear = np.dot(X, self.weights) + self.bias
        probs = sigmoid(linear)
        return (probs >= 0.5).astype(int)


# ---------------------------------------------------------
# 3. Manual Metrics (NumPy Only)
# ---------------------------------------------------------
def accuracy_score(y_true, y_pred):
    return np.mean(y_true == y_pred)


def precision_score(y_true, y_pred):
    true_positive = np.sum((y_pred == 1) & (y_true == 1))
    predicted_positive = np.sum(y_pred == 1)
    return true_positive / predicted_positive if predicted_positive > 0 else 0


def recall_score(y_true, y_pred):
    true_positive = np.sum((y_pred == 1) & (y_true == 1))
    actual_positive = np.sum(y_true == 1)
    return true_positive / actual_positive if actual_positive > 0 else 0


# ---------------------------------------------------------
# 4. Generate Synthetic Binary Dataset (2D)
# ---------------------------------------------------------
X, y = make_classification(
    n_samples=500,
    n_features=2,          # requirement: 2D dataset
    n_redundant=0,
    n_clusters_per_class=1,
    random_state=42
)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# ---------------------------------------------------------
# 5. Train Model
# ---------------------------------------------------------
model = LogisticRegressionScratch(learning_rate=0.1, max_iters=3000)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# ---------------------------------------------------------
# 6. Evaluation (MANUAL METRICS)
# ---------------------------------------------------------
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)

print("Final Weights:", model.weights)
print("Final Bias:", model.bias)
print("Accuracy:", acc)
print("Precision:", prec)
print("Recall:", rec)


# ---------------------------------------------------------
# 7. Decision Boundary Plot
# ---------------------------------------------------------
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

xx, yy = np.meshgrid(
    np.linspace(x_min, x_max, 200),
    np.linspace(y_min, y_max, 200)
)

grid = np.c_[xx.ravel(), yy.ravel()]
Z = model.predict(grid)
Z = Z.reshape(xx.shape)

plt.contourf(xx, yy, Z, levels=[-1, 0, 1], alpha=0.3)
plt.scatter(X[:, 0], X[:, 1], c=y, s=20)
plt.title("Decision Boundary - Logistic Regression (From Scratch)")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.savefig("decision_boundary.png")
plt.close()