In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
import numpy as np


from sklearn.metrics import accuracy_score


def sklearn_to_df(data_loader):
    X_data = data_loader.data
    X_columns = data_loader.feature_names
    x = pd.DataFrame(X_data, columns=X_columns)

    y_data = data_loader.target
    y = pd.Series(y_data, name='target')

    return x, y

# Loading a classic binary classification dataset (breast cancer prediction from 30 features).
# More details: https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_breast_cancer.html
x, y = sklearn_to_df(load_breast_cancer())

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42)


In [None]:
class LogisticRegression:

    def fit(self, x, y, epochs):

        self.weights = np.zeros(x.shape[1])
        self.bias = 0
        self.train_accuracies = []
        self.losses = []

        for i in range(epochs):
            x_dot_weights = np.matmul(self.weights, x.transpose()) + self.bias
            pred = self._sigmoid(x_dot_weights)
            loss = self.get_loss(y, pred)
            error_w, error_b = self.get_gradients(x, y, pred)
            self.update_model_parameters(error_w, error_b)

            pred_to_class = [1 if p > 0.5 else 0 for p in pred]
            self.train_accuracies.append(accuracy_score(y, pred_to_class))
            self.losses.append(loss)

    def _sigmoid(self, x):
        return np.array([self._sigmoid_fn(value) for value in x])

    def _sigmoid_fn(self, x):
        # @TODO: Implement the sigmoid function
        if x >= 0:
          return 1 / (1 + np.exp(-x))
        else:
          return np.exp(x) / (1 + np.exp(x))

    def get_loss(self, y_true, y_pred):
        # binary cross entropy
        # @TODO: Implement the binary cross-entropy loss
        y_zero_loss = y_true * np.log(y_pred + 1e-9)
        y_one_loss = (1-y_true) * np.log(1 - y_pred + 1e-9)
        return -np.mean(y_zero_loss + y_one_loss)


    def get_gradients(self, x, y_true, y_pred):
        # derivative of binary cross entropy
        # @TODO: from y_true and y_pred, compute the bias gradient
        gradient_b = np.mean(y_pred - y_true)

        # @TODO: from x, y_true, and y_pred, compute the weight gradient
        w = np.matmul(x.transpose(), (y_pred - y_true))
        gradients_w = np.array([np.mean(gradients) for gradients in w])

        return gradients_w, gradient_b


    def update_model_parameters(self, error_w, error_b):
        learning_rate = 0.1

        self.weights = self.weights - learning_rate * error_w
        self.bias = self.bias - learning_rate * error_b

    def predict(self, x):
        x_dot_weights = np.matmul(x, self.weights.transpose()) + self.bias
        probabilities = self._sigmoid(x_dot_weights)
        return [1 if p > 0.5 else 0 for p in probabilities]

In [None]:
# Do NOT modify this cell
classifier = LogisticRegression()
classifier.fit(x_train, y_train, epochs=500)    # We train for 500 epochs
pred = classifier.predict(x_test)
accuracy = accuracy_score(y_test, pred)
print(accuracy) # Expected accuracy is >= 0.94

0.9473684210526315
