In [2]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### load and preprocess dataset

In [49]:
data = load_breast_cancer(return_X_y=True)

# add a column with value 1 for w0 as the firs column
X = np.hstack((np.ones((len(y), 1)), X))
X = StandardScaler().fit_transform(X)

In [50]:
# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Building A Logistic Regression in Python

In [51]:
class LogisticRegression:
    
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.weights = None
        
    def fit(self, X, y):
        num_samples, num_features = X.shape
        
        # Initialize weights and bias
        self.weights = np.zeros(num_features)
        
        # Gradient Descent
        for _ in range(self.num_iterations):
            linear_model = np.dot(X, self.weights)
            y_predicted = self.sigmoid(linear_model)
            
        # Update weights and bias using gradient descent
        dw = (1 / num_samples) * np.dot(X.T, (y_predicted - y))
        self.weights -= self.learning_rate * dw
        
    def predict(self, X):
        linear_model = np.dot(X, self.weights)
        y_predicted = self.sigmoid(linear_model)
        y_predicted_cls = [1 if p > 0.5 else 0 for p in y_predicted]
        return y_predicted_cls
    @staticmethod
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

In [52]:
model = LogisticRegression()

In [53]:
# fit the training data
model.fit(X_train, y_train)

In [54]:
# Predict the lablesfor the testing data
y_pred = model.predict(X_test)

In [55]:
# Evaluate the model
accuracy = np.mean(y_pred == y_test)
print(f"accuracy = {accuracy}")

accuracy = 0.9649122807017544


## SKLearn Logistic Regression

In [56]:
from sklearn.linear_model import LogisticRegression as SKLogisticRegression
from sklearn.metrics import accuracy_score

In [57]:
# Create an instance and fit the training data
SKmodel = SKLogisticRegression()
SKmodel.fit(X_train, y_train)

LogisticRegression()

In [58]:
# Predict the labels for the testing set
SKy_pred = SKmodel.predict(X_test)

In [59]:
# Getting the accuracy of model
accuracy_score(y_test, SKy_pred)

0.9736842105263158