# Breast cancer
---

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.nn import functional as F
from sklearn import datasets

In [None]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [None]:
breast_cancer = datasets.load_breast_cancer(as_frame=True)
print(breast_cancer.DESCR)

In [None]:
breast_cancer.data.corr()

In [None]:
X = breast_cancer.data.values
y = breast_cancer.target.values
m, n = X.shape
print(f"{X.shape=}")
print(f"{y.shape=}")
print(f"{m=}")
print(f"{n=}")

In [None]:
y = y.reshape(-1, 1)
print(f"{y.shape=}")

In [None]:
np.random.seed(0)
w = np.random.randn(1, n)
b = np.random.randn(1)

In [None]:
def linear_combination(w, b, X):
    return X @ w.T + b

def model(w, b, X):
    return sigmoid(linear_combination(w, b, X))

The values resulting from the matrix multiplication are huge and when they get inputed into the sigmoid, they saturate:

In [None]:
model(w, b, X)[:10]

We know that the weights are drawn from a normal distribution, thus, they have mean of 0 and standard deviation of 1. Despite the weights being close to 0, the linear combination outputs huge values, therefore it is because the input features are already very huge which we can check by examining the mean of each feature:

In [None]:
breast_cancer.data.describe().loc["mean"]

**We need to normalize our input features.**

In [None]:
X = (X - X.mean(axis=0, keepdims=True)) / X.std(axis=0, keepdims=True)

In [None]:
(X @ w.T)[:10]

In [None]:
model(w, b, X)[:10]

Much better, the model will be able to learn.

In [None]:
output = model(w, b, X)
nll = - sum(y * np.log(output) + (1-y) * np.log(1-output)) / m
print(f"negative log likelihood = {nll.item():.3f}")

In [None]:
linear = nn.Linear(n, 1)
linear.weight = nn.Parameter(torch.from_numpy(w))
linear.bias = nn.Parameter(torch.from_numpy(b))

output_ = torch.sigmoid(linear(torch.from_numpy(X)))

loss = F.binary_cross_entropy(output_, torch.from_numpy(y.astype(np.float64)))
print(f"nll from pytorch: {loss.item():.3f}")

In [None]:
np.random.seed(0)
w = np.random.randn(1, n)
b = np.random.randn(1)

losses = []
for i in range(300):
    output = model(w, b, X)
    nll = - sum(y * np.log(output) + (1-y) * np.log(1-output)) / m

    dw = (1/m) * np.dot(X.T, (output - y))
    db = (1/m) * np.sum(output - y)

    w -= .1 * dw.T
    b -= .1 * db.T
    losses.append(nll)

pd.DataFrame(losses).plot();