In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import torch
import torch.nn as nn

SEED = 42
torch.manual_seed(SEED)

data = load_breast_cancer()
X = data["data"]
y = data["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = SEED)

In [None]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [None]:
alpha = 3e-4

In [None]:
X_t = torch.from_numpy(X_train).float()
y_t = torch.from_numpy(y_train).float()
X_te = torch.from_numpy(X_test).float()
y_te = torch.from_numpy(y_test).float()

m = X_t.shape[0]
n = X_t.shape[1]
theta = torch.rand(30).float()


In [None]:
theta

tensor([0.7047, 0.2545, 0.3994, 0.2122, 0.4089, 0.1481, 0.1733, 0.6659, 0.3514,
        0.8087, 0.3396, 0.1332, 0.4118, 0.2576, 0.3470, 0.0240, 0.7797, 0.1519,
        0.7513, 0.7269, 0.8572, 0.1165, 0.8596, 0.2636, 0.6855, 0.9696, 0.4295,
        0.4961, 0.3849, 0.0825])

In [None]:
## Parámetros Iniciales
theta_init = theta
theta_init

tensor([0.8823, 0.9150, 0.3829, 0.9593, 0.3904, 0.6009, 0.2566, 0.7936, 0.9408,
        0.1332, 0.9346, 0.5936, 0.8694, 0.5677, 0.7411, 0.4294, 0.8854, 0.5739,
        0.2666, 0.6274, 0.2696, 0.4414, 0.2969, 0.8317, 0.1053, 0.2695, 0.3588,
        0.1994, 0.5472, 0.0062])

In [None]:
## Vamos a entrenar la Regresión Logística por 1000 epochs

epochs = 1000
for e in range(epochs):
  theta = theta - alpha/m * X_t.T @ (torch.sigmoid(X_t @ theta) - y_t)
theta

tensor([ 0.9026,  0.8531,  0.4552,  0.1514,  0.3904,  0.5996,  0.2544,  0.7928,
         0.9407,  0.1332,  0.9315,  0.5864,  0.8410,  0.0941,  0.7411,  0.4290,
         0.8849,  0.5738,  0.2664,  0.6274,  0.2897,  0.3517,  0.3309, -0.2771,
         0.1051,  0.2656,  0.3538,  0.1980,  0.5466,  0.0059])

In [None]:
## Transformar probabilidades a clases
def predict(X_te, theta):
  p = torch.sigmoid(X_te @ theta).numpy()
  y_pred = np.where(p >=0.5, 1, 0)
  return y_pred

y_pred = predict(X_te, theta)
print("Accuracy Score: ", accuracy_score(y_test, y_pred))

Accuracy Score:  0.9300699300699301


## Implementación en Pytorch usando `nn.Module`

In [None]:
class LogReg(nn.Module):
  def __init__(self,n):
    super().__init__()
    self.fc = nn.Linear(n, 1, bias = False)
    self.fc.weight.data = theta_init.reshape(1,-1)

  def forward(self, x):
    x = self.fc(x)
    return x

model = LogReg(n)
optimizer = torch.optim.SGD(model.parameters(), lr = alpha)
criterion = nn.BCEWithLogitsLoss()

loss_history = []
for e in range(epochs):
  model.train()
  optimizer.zero_grad()
  logits = model(X_t)
  loss = criterion(logits, y_t.unsqueeze(-1))
  loss.backward()
  optimizer.step()
  loss_history.append(loss.item())

def predict_pytorch(X_te):
  ## Se debe colocar el modelo en Modo Evaluación
  model.eval()
  with torch.no_grad():
    p = model(X_te).detach().numpy()
  y_pred = np.where(p >=0.5, 1, 0)
  return y_pred

y_pred = predict_pytorch(X_te)
print("Accuracy Score: ", accuracy_score(y_test, y_pred))

Accuracy Score:  0.9300699300699301


In [None]:
## Los pesos obtenidos por Pytorch son idénticos a los obtenidos de manera teórica.
model.fc.weight.data

tensor([[ 0.9028,  0.8534,  0.4561,  0.1524,  0.3904,  0.5996,  0.2544,  0.7928,
          0.9407,  0.1332,  0.9315,  0.5864,  0.8410,  0.0932,  0.7411,  0.4290,
          0.8849,  0.5738,  0.2664,  0.6274,  0.2898,  0.3521,  0.3317, -0.2781,
          0.1051,  0.2656,  0.3538,  0.1980,  0.5466,  0.0060]])