In [159]:
from sklearn.datasets import make_classification
import numpy as np
from pydantic import BaseModel, Field

X, y = make_classification(n_samples=1000, n_features=20)

In [160]:
class LogisticRegression(BaseModel, arbitrary_types_allowed=True):
    x: np.ndarray = Field(description="Independent Features")
    y: np.ndarray = Field(description="Dependent Feature")

    def sigmoid(self, z: np.ndarray):
        return 1 / (1 + np.exp(-z))

    def compute_gradient(self, X, y, theta):
        m = len(y)
        predictions = self.sigmoid(np.dot(X, theta))
        errors = predictions - y
        gradient = (1 / m) * np.dot(X.T, errors)
        return gradient

    def gradient_descent(self, X, y, learning_rate, iterations):
        theta = np.zeros((X.shape[1], 1))
        for _ in range(iterations):
            gradient = self.compute_gradient(X, y, theta)
            theta -= learning_rate * gradient
        return theta


# Generate synthetic data
X, y = make_classification(n_samples=1000, n_features=20)

# Add intercept term to X
X = np.hstack((np.ones((X.shape[0], 1)), X))

# Reshape y to be a column vector
y = y.reshape(-1, 1)

# Initialize and train the model
model = LogisticRegression(x=X, y=y)
theta = model.gradient_descent(X, y, learning_rate=0.01, iterations=1000)

In [161]:
predictions = 1 / (1 + np.exp(-np.dot(theta.T, X.T)))

In [162]:
predictions = np.where(predictions < 0.5, 0, 1)

In [163]:
loss = predictions.ravel() - y.ravel()

In [164]:
np.count_nonzero(loss == -1)

73

In [165]:
accuracy = (len(loss) - np.count_nonzero(loss == -1)) / len(loss)

In [166]:
accuracy * 100

92.7