### Logistic Regression step by step

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.datasets import make_classification

### Cost function

In [2]:
def compute_cost_function(X,y,w,b):
    m = X.shape[0]
    cost =0.0
    for i in range(m):
        z_i = np.dot(X[i], w) + b
        f_wb_i = 1 / (1 + np.exp(-z_i))
        cost += -y[i] * np.log(f_wb_i) - (1 - y[i]) * np.log(1 - f_wb_i)
    cost = cost / m
    return cost


In [None]:
X, y = make_classification(n_samples=1000, n_features=2, n_redundant=0, 
                           n_informative=2, n_clusters_per_class=1, 
                           random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")
print(f"Number of features: {X_train.shape[1]}")


### Implementation using sklearn


In [None]:
model = LogisticRegression(random_state=42, max_iter=1000)
model.fit(X_train, y_train)

print(f"Model coefficients (w): {model.coef_[0]}")
print(f"Model intercept (b): {model.intercept_[0]}")


In [None]:
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

y_train_proba = model.predict_proba(X_train)[:, 1]
y_test_proba = model.predict_proba(X_test)[:, 1]

print("Sample predictions (first 10):")
print(f"True labels: {y_test[:10]}")
print(f"Predicted labels: {y_test_pred[:10]}")
print(f"Prediction probabilities: {y_test_proba[:10]}")


In [None]:
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"Training accuracy: {train_accuracy:.4f}")
print(f"Test accuracy: {test_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_test_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))


In [None]:
def plot_decision_boundary(X, y, model):
    h = 0.02 
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.figure(figsize=(10, 6))
    plt.contourf(xx, yy, Z, alpha=0.4, cmap=plt.cm.RdYlBu)
    scatter = plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdYlBu, edgecolors='black')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title('Logistic Regression Decision Boundary')
    plt.colorbar(scatter)
    plt.show()

plot_decision_boundary(X_train, y_train, model)


### Compare with manual cost function


In [None]:

w_sklearn = model.coef_[0]
b_sklearn = model.intercept_[0]
manual_cost = compute_cost_function(X_train, y_train, w_sklearn, b_sklearn)

print(f"Cost using sklearn parameters: {manual_cost:.4f}")
print(f"Model score (sklearn): {model.score(X_train, y_train):.4f}")
