In [1]:
# creamos el modelo (imput, output size, forward pass)
# creamos loss functions, optimizer
# training loop
#    - forward pass
#    - backward pass (gradients)
#    - update weights


# ***** sin direccionar datos procesos a GPU *******


#conda install scikit-learn


import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import subprocess
import time

start_time = time.time()


# the data is coming from the scikit-learn (sklearn) datasets module. 
# Specifically, the datasets.load_breast_cancer() function is used to load the Breast Cancer Wisconsin (Diagnostic) dataset.



# 0) data retrival and data preparation

bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target
n_samples, n_features = X.shape
print(f'descripción de los datos {bc.DESCR}')   #to get the description of the data

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state = 1234)
# dividimos los datos en train y test, 75% train, 25% test

sc = StandardScaler()
#crea una instancia del escalador que realizará la transformación

X_train = sc.fit_transform(X_train)
#  calcula la media y desviación estándar de cada característica en el conjunto de entrenamiento X_train 
# y transforma esos datos para que cada característica tenga media 0 y desviación estándar 1.

X_test = sc.transform(X_test)

#aplica la misma transformación al conjunto de prueba X_test, usando la media 
# y desviación estándar calculadas en X_train, para mantener la consistencia entre ambos conjuntos.

print("X_test Shape:", X_test.shape)
print("y_test Shape:", y_test.shape)
print("")



# 1) data preparation for pytorch

X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))
y_train = y_train.view(y_train.shape[0],1)
y_test = y_test.view(y_test.shape[0],1)

# model definition
# Y= w*x  with sigmoid at the end


class LogisticRegression(nn.Module):

    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features,1)

    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted
    
model = LogisticRegression(n_features)
learning_rate = 0.00001
# Binary Cross-Entropy Loss: This loss function measures the difference between the predicted probabilities 
# (output of the model) and the actual binary labels (0 or 1). It is particularly suitable for tasks where the 
# output is a probability value between 0 and 1, such as when using a sigmoid activation function in the final layer of the model.

criterion = nn.BCELoss()

# Stochastic Gradient Descent (SGD): SGD is an optimization algorithm used to update the model's parameters (weights) 
# based on the gradients of the loss function. It is called "stochastic" because it updates the parameters using a randomly selected subset (mini-batch) of the training data, rather than the entire dataset at once.


optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

num_epochs = 70000


for epoch in range(num_epochs):

    # forward
    y_predicted =model(X_train)
    loss = criterion(y_predicted, y_train)

    # gradient
    loss.backward()

    # update weights
    optimizer.step()

    #zero gradients

    optimizer.zero_grad()

    if epoch % (num_epochs/10) == 0:


        print(f'iteracion: {epoch}, loss: {loss.item(): .4f}')


# Run nvidia-smi command and capture its output
#result = subprocess.run(["nvidia-smi"], capture_output=True, text=True)
#print(result.stdout)


with torch.no_grad():
    y_pred = model(X_test)
    y_pred_cls = y_pred.round()
    acc = y_pred_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f' accuracy: {acc:.4f}')

end_time = time.time()
print(f"Execution time: {end_time - start_time} seconds")



descripción de los datos .. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

:Number of Instances: 569

:Number of Attributes: 30 numeric, predictive attributes and the class

:Attribute Information:
    - radius (mean of distances from center to points on the perimeter)
    - texture (standard deviation of gray-scale values)
    - perimeter
    - area
    - smoothness (local variation in radius lengths)
    - compactness (perimeter^2 / area - 1.0)
    - concavity (severity of concave portions of the contour)
    - concave points (number of concave portions of the contour)
    - symmetry
    - fractal dimension ("coastline approximation" - 1)

    The mean, standard error, and "worst" or largest (mean of the three
    worst/largest values) of these features were computed for each image,
    resulting in 30 features.  For instance, field 0 is Mean Radius, field
    10 is Radius SE, field 20

In [None]:
# creamos el modelo (imput, output size, forward pass)
# creamos loss functions, optimizer
# training loop
#    - forward pass
#    - backward pass (gradients)
#    - update weights

# *****  Direccionar datos procesos a GPU *******

import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import subprocess
import time


start_time = time.time()


# To make sure your PyTorch model uses the GPU, you need to explicitly move the model and the data it processes to the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# the data is coming from the scikit-learn (sklearn) datasets module. 
# Specifically, the datasets.load_breast_cancer() function is used to load the Breast Cancer Wisconsin (Diagnostic) dataset.

# 0) data retrival and data preparation

bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target
n_samples, n_features = X.shape
# print(bc.DESCR)   #to get the description of the data
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state = 1234)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

print("X_test Shape:", X_test.shape)
print("y_test Shape:", y_test.shape)


# fit_transform. Fit: This part of the process involves calculating the necessary statistics to perform the transformation. 
# It calculates the mean and standard deviation of the dataset.
# Transform: Based on the statistics calculated during the fit stage, this step scales and shifts the data so that it follows a specific distribution 
# The transform method is used to apply a previously computed transformation to the data. This method does not calculate any new transformation parameters but uses the ones computed from a previous fit or fit_transform operation.



# sc.fit_transform(X_train):
# This method is used to fit the scaler to the training data and transform the training data in a single step.
# It calculates the mean and standard deviation of the training data and uses these values to standardize the training data.
# This method should only be used on the training data to avoid data leakage.
# sc.transform(X_train):
# This method is used to transform the data using the parameters (mean and standard deviation) learned from the training data.
# It assumes that the scaler has already been fitted to the training data using sc.fit or sc.fit_transform.
# It applies the same scaling transformation to the input data (in this case, X_train) based on the previously calculated mean and standard deviation.

X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))
y_train = y_train.view(y_train.shape[0],1)
y_test = y_test.view(y_test.shape[0],1)

#  Each batch of the data needs to be sent to the GPU

X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)


# model definition
# Y= w*x  whit sigmoid at the end

# @jitclass not working

class LogisticRegression(nn.Module):

    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features,1)

    # @jit not working
    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted
    
model = LogisticRegression(n_features).to(device)

learning_rate = 0.00001
# Binary Cross-Entropy Loss: This loss function measures the difference between the predicted probabilities 
# (output of the model) and the actual binary labels (0 or 1). It is particularly suitable for tasks where the 
# output is a probability value between 0 and 1, such as when using a sigmoid activation function in the final layer of the model.

criterion = nn.BCELoss()

# Stochastic Gradient Descent (SGD): SGD is an optimization algorithm used to update the model's parameters (weights) 
# based on the gradients of the loss function. It is called "stochastic" because it updates the parameters using a randomly selected subset (mini-batch) of the training data, rather than the entire dataset at once.


optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

num_epochs = 1000000


for epoch in range(num_epochs):

    # forward
    y_predicted =model(X_train)     # X_train is already on GPU
    loss = criterion(y_predicted, y_train)     # y_train is already on GPU

    # gradient
    loss.backward()

    # update weights
    optimizer.step()

    #zero gradients

    optimizer.zero_grad()

    if epoch % (num_epochs/10) == 0:


        print(f'iteracion: {epoch}, loss: {loss.item(): .4f}')


# Run nvidia-smi command and capture its output
result = subprocess.run(["nvidia-smi"], capture_output=True, text=True)
print(result.stdout)


with torch.no_grad():
    y_pred = model(X_test)
    y_pred_cls = y_pred.round()
    acc = y_pred_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f' accuracy: {acc:.4f}')

end_time = time.time()
print(f"Execution time: {end_time - start_time} seconds")


In [2]:
# ***** ENTRENAMIENTO CON BATCHING EN GPU CON Pytorch *******

import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
import subprocess
import time

# ---------- Configuración ----------
batch_size = 128
learning_rate = 1e-5
num_epochs = 50000
print_every = num_epochs // 100

start_time = time.time()

# ---------- Selección del dispositivo ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ---------- Preparación de los datos ----------
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=1234
)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Convertir a tensores
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32)).view(-1, 1)
y_test = torch.from_numpy(y_test.astype(np.float32)).view(-1, 1)

# Crear DataLoader
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# Enviar test data a GPU
X_test = X_test.to(device)
y_test = y_test.to(device)

# ---------- Definición del modelo ----------
class LogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))

model = LogisticRegression(X_train.shape[1]).to(device)

# ---------- Función de pérdida y optimizador ----------
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# ---------- Entrenamiento ----------
for epoch in range(num_epochs):
    for X_batch, y_batch in train_loader:
        # Enviar batch a GPU, si está disponible
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        # Forward
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)

        # Backward
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    if (epoch + 1) % print_every == 0 or epoch == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# ---------- Evaluación ----------
with torch.no_grad():
    y_pred = model(X_test)
    y_pred_cls = y_pred.round()
    acc = y_pred_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f'\nAccuracy on test set: {acc:.4f}')

try:
    result = subprocess.run(["nvidia-smi"], capture_output=True, text=True)
    print("\nNVIDIA-SMI output:\n", result.stdout)
except FileNotFoundError:
    print("\n'nvidia-smi' no está disponible. Asegúrate de tener una GPU NVIDIA con drivers instalados.")

end_time = time.time()
print(f"Execution time: {end_time - start_time:.2f} seconds")


Using device: cpu
Epoch [1/50000], Loss: 0.6340
Epoch [500/50000], Loss: 0.5728
Epoch [1000/50000], Loss: 0.5291
Epoch [1500/50000], Loss: 0.4791
Epoch [2000/50000], Loss: 0.5585
Epoch [2500/50000], Loss: 0.5187
Epoch [3000/50000], Loss: 0.4338
Epoch [3500/50000], Loss: 0.4470
Epoch [4000/50000], Loss: 0.4568
Epoch [4500/50000], Loss: 0.4442
Epoch [5000/50000], Loss: 0.4782
Epoch [5500/50000], Loss: 0.4180
Epoch [6000/50000], Loss: 0.4356
Epoch [6500/50000], Loss: 0.3655
Epoch [7000/50000], Loss: 0.3742
Epoch [7500/50000], Loss: 0.4525
Epoch [8000/50000], Loss: 0.3647
Epoch [8500/50000], Loss: 0.4115
Epoch [9000/50000], Loss: 0.3804
Epoch [9500/50000], Loss: 0.3479
Epoch [10000/50000], Loss: 0.3314
Epoch [10500/50000], Loss: 0.3508
Epoch [11000/50000], Loss: 0.2299
Epoch [11500/50000], Loss: 0.2882
Epoch [12000/50000], Loss: 0.2836
Epoch [12500/50000], Loss: 0.2978
Epoch [13000/50000], Loss: 0.3273
Epoch [13500/50000], Loss: 0.2812
Epoch [14000/50000], Loss: 0.2934
Epoch [14500/50000],

In [None]:
# más general:

class ThreeLayerNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, output_dim):
        super(ThreeLayerNetwork, self).__init__()
        # Capa 1: Transformación lineal + Tanh
        self.layer1 = nn.Linear(input_dim, hidden_dim1)
        # Capa 2: Transformación lineal + ReLU
        self.layer2 = nn.Linear(hidden_dim1, hidden_dim2)
        # Capa 3: Transformación lineal + LogSoftmax (logit)
        self.layer3 = nn.Linear(hidden_dim2, output_dim)
        # Función de activación final (logit)
        self.log_softmax = nn.LogSoftmax(dim=1)
    
    def forward(self, x):
        # Primera capa: Lineal + Tanh
        x = torch.tanh(self.layer1(x))
        # Segunda capa: Lineal + ReLU
        x = torch.relu(self.layer2(x))
        # Tercera capa: Lineal + LogSoftmax
        x = self.layer3(x)
        return self.log_softmax(x)
    
# Ejemplo de uso
# Hiperparámetros
input_size = 10
hidden1_size = 64
hidden2_size = 32
output_size = 3

# Crear modelo
model = ThreeLayerNetwork(
    input_dim=input_size,
    hidden_dim1=hidden1_size,
    hidden_dim2=hidden2_size,
    output_dim=output_size
)

# Ejemplo de entrada
x = torch.randn(5, input_size)  # Batch de 5 muestras

# Forward pass
output = model(x)
print(output.shape)  # Salida: torch.Size([5, 3])
