# **Diplomatura en Ciencia de Datos - UNNE - 2024**
### Módulo 4: Aprendizaje Automático
### Clase 6: Redes Neuronales Artificiales con técnicas de regularización

## **Dropout**

Vamos a comparar el entrenamiento de una red con y sin dropout.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml
from torch.utils.data import DataLoader, TensorDataset
import random

np.random.seed(54)
random.seed(54)
torch.manual_seed(21)


# Fetch the Heart Disease dataset
data = fetch_openml(name="heart-disease", version=1)
X = data['data'].drop(['target'], axis=1)
y = data['data']['target']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype=torch.float32)

# Neural Network (without Dropout)
class HeartNet(nn.Module):
    def __init__(self):
        super(HeartNet, self).__init__()
        self.fc1 = nn.Linear(X_tensor.shape[1], 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Binary cross-entropy loss function
loss_fn = nn.BCEWithLogitsLoss()

# Mini-batch size, learning rate, and epochs
batch_size = 32
learning_rate = 0.1
n_epochs = 100

# 10-Fold Cross Validation
kf = KFold(n_splits=10, shuffle=True)
fold_results = []

for fold, (train_idx, test_idx) in enumerate(kf.split(X_tensor)):
    print(f'Fold {fold + 1}')
    
    # Initialize the network
    modelo = HeartNet()
    
    # Split data
    X_train, X_test = X_tensor[train_idx], X_tensor[test_idx]
    y_train, y_test = y_tensor[train_idx], y_tensor[test_idx]

    # Create DataLoader for mini-batch SGD
    train_dataset = TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    # Training process with mini-batch SGD
    for epoch in range(n_epochs):
        for X_batch, y_batch in train_loader:
            # Forward pass
            y_pred = modelo(X_batch).squeeze()  # Predictions
            loss = loss_fn(y_pred, y_batch)
            
            # Backward pass and weight update
            modelo.zero_grad()  # Clear previous gradients
            loss.backward()  # Compute gradients
            
            # Update weights manually
            with torch.no_grad():
                for param in modelo.parameters():
                    param -= learning_rate * param.grad
        
    # Evaluate on the test set and calculate accuracy
    with torch.no_grad():
        y_test_pred = torch.sigmoid(modelo(X_test).squeeze())
        y_test_pred = (y_test_pred >= 0.5).float()  # Convert to binary classification
        accuracy = (y_test_pred == y_test).float().mean().item()  # Compute accuracy
        fold_results.append(accuracy)
        print(f'Accuracy for fold {fold + 1}: {accuracy:.4f}')

# Final accuracy
print(f'Mean Accuracy across all folds: {np.mean(fold_results):.4f}')


In [None]:
# Neural Network with Dropout
class HeartNetDropout(nn.Module):
    def __init__(self):
        super(HeartNetDropout, self).__init__()
        self.fc1 = nn.Linear(X_tensor.shape[1], 64)
        self.dropout1 = nn.Dropout(0.2)  # Dropout layer
        self.fc2 = nn.Linear(64, 32)
        self.dropout2 = nn.Dropout(0.2)  # Dropout layer
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

# Repeating the training and evaluation process with the dropout model
fold_results_dropout = []

for fold, (train_idx, test_idx) in enumerate(kf.split(X_tensor)):
    print(f'Fold {fold + 1} (Dropout)')
    
    # Initialize the network with Dropout
    model = HeartNetDropout()
    
    # Split data
    X_train, X_test = X_tensor[train_idx], X_tensor[test_idx]
    y_train, y_test = y_tensor[train_idx], y_tensor[test_idx]

    # Create DataLoader for mini-batch SGD
    train_dataset = TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    # Training process with mini-batch SGD
    for epoch in range(n_epochs):
        for X_batch, y_batch in train_loader:
            # Forward pass
            y_pred = model(X_batch).squeeze()  # Predictions
            loss = loss_fn(y_pred, y_batch)
            
            # Backward pass and weight update
            model.zero_grad()  # Clear previous gradients
            loss.backward()  # Compute gradients
            
            # Update weights manually
            with torch.no_grad():
                for param in model.parameters():
                    param -= learning_rate * param.grad
    
    # Evaluate on the test set and calculate accuracy
    with torch.no_grad():
        y_test_pred = torch.sigmoid(model(X_test).squeeze())
        y_test_pred = (y_test_pred >= 0.5).float()  # Convert to binary classification
        accuracy = (y_test_pred == y_test).float().mean().item()  # Compute accuracy
        fold_results_dropout.append(accuracy)
        print(f'Accuracy for fold {fold + 1} (Dropout): {accuracy:.4f}')

# Final accuracy with Dropout
print(f'Mean Accuracy across all folds (Dropout): {np.mean(fold_results_dropout):.4f}')


## **Inicialización Xavier**

Vamos a comparar dos modelos, con y sin inicialialización Xavier / Glorot, para funciones de activación distintas.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

torch.manual_seed(771142090864900)

# Create a synthetic dataset for binary classification
X, y = make_moons(n_samples=1000, noise=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert the data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.LongTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.LongTensor(y_test)

# Define a simple feedforward neural network
class SimpleNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, activation='sigmoid', xavier_init=False):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

        # Choose activation function
        if activation == 'sigmoid':
            self.activation = nn.Sigmoid()
        elif activation == 'tanh':
            self.activation = nn.Tanh()

        # Apply Xavier initialization if specified
        if xavier_init:
            torch.nn.init.xavier_uniform_(self.fc1.weight)
            torch.nn.init.xavier_uniform_(self.fc2.weight)

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.fc2(x)
        return x

# Train the model
def train_model(model, X_train, y_train, X_test, y_test, num_epochs=100, learning_rate=0.01):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    
    train_losses, test_losses = [], []
    
    for epoch in range(num_epochs):
        # Training step
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

        # Testing step
        model.eval()
        with torch.no_grad():
            test_outputs = model(X_test)
            test_loss = criterion(test_outputs, y_test)
            test_losses.append(test_loss.item())

        # Print progress
        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Test Loss: {test_loss.item():.4f}')
    
    return train_losses, test_losses

# Hyperparameters
input_dim = 2  # Number of features in the dataset
hidden_dim = 10  # Number of hidden units
output_dim = 2  # Binary classification (2 classes)
num_epochs = 500
learning_rate = 0.01

# Initialize models with and without Xavier initialization
model_no_xavier_sigmoid = SimpleNN(input_dim, hidden_dim, output_dim, activation='sigmoid', xavier_init=False)
model_xavier_sigmoid = SimpleNN(input_dim, hidden_dim, output_dim, activation='sigmoid', xavier_init=True)

model_no_xavier_tanh = SimpleNN(input_dim, hidden_dim, output_dim, activation='tanh', xavier_init=False)
model_xavier_tanh = SimpleNN(input_dim, hidden_dim, output_dim, activation='tanh', xavier_init=True)

# Train the models
print("\nTraining model without Xavier initialization (Sigmoid)...")
train_losses_no_xavier_sigmoid, test_losses_no_xavier_sigmoid = train_model(
    model_no_xavier_sigmoid, X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, num_epochs, learning_rate)

print("\nTraining model with Xavier initialization (Sigmoid)...")
train_losses_xavier_sigmoid, test_losses_xavier_sigmoid = train_model(
    model_xavier_sigmoid, X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, num_epochs, learning_rate)

print("\nTraining model without Xavier initialization (Tanh)...")
train_losses_no_xavier_tanh, test_losses_no_xavier_tanh = train_model(
    model_no_xavier_tanh, X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, num_epochs, learning_rate)

print("\nTraining model with Xavier initialization (Tanh)...")
train_losses_xavier_tanh, test_losses_xavier_tanh = train_model(
    model_xavier_tanh, X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, num_epochs, learning_rate)

# Plot the training and test loss comparison
epochs = range(1, num_epochs+1)
plt.figure(figsize=(12, 6))

# Sigmoid Activation
plt.subplot(1, 2, 1)
plt.plot(epochs, test_losses_no_xavier_sigmoid, label='No Xavier', color='blue')
plt.plot(epochs, test_losses_xavier_sigmoid, label='Xavier', color='orange')
plt.title('Sigmoid Activation - Xavier vs No Xavier')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Tanh Activation
plt.subplot(1, 2, 2)
plt.plot(epochs, test_losses_no_xavier_tanh, label='No Xavier', color='blue')
plt.plot(epochs, test_losses_xavier_tanh, label='Xavier', color='orange')
plt.title('Tanh Activation - Xavier vs No Xavier')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


## **Inicialización He**

Vamos a ver el efecto de la inicialización He / Klaiming sobre la función ReLU.

In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Create a synthetic dataset for binary classification
X, y = make_moons(n_samples=1000, noise=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert the data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.LongTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.LongTensor(y_test)

# Define a simple feedforward neural network with He initialization and ReLU activation
class SimpleNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, he_init=False):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

        # Apply He initialization if specified
        if he_init:
            torch.nn.init.kaiming_uniform_(self.fc1.weight, nonlinearity='relu')
            torch.nn.init.kaiming_uniform_(self.fc2.weight, nonlinearity='relu')

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Train the model without using optimizers (manual gradient descent)
def train_model(model, X_train, y_train, X_test, y_test, num_epochs=100, learning_rate=0.01):
    criterion = nn.CrossEntropyLoss()
    
    train_losses, test_losses = [], []
    
    for epoch in range(num_epochs):
        model.train()

        # Forward pass
        outputs = model(X_train)
        loss = criterion(outputs, y_train)

        # Backward pass and weight update (manual gradient descent)
        model.zero_grad()  # Zero out previous gradients
        loss.backward()

        # Manually update weights using gradient descent
        with torch.no_grad():
            for param in model.parameters():
                param -= learning_rate * param.grad
        
        train_losses.append(loss.item())

        # Testing step
        model.eval()
        with torch.no_grad():
            test_outputs = model(X_test)
            test_loss = criterion(test_outputs, y_test)
            test_losses.append(test_loss.item())

        # Print progress
        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Test Loss: {test_loss.item():.4f}')
    
    return train_losses, test_losses

# Hyperparameters
input_dim = 2  # Number of features in the dataset
hidden_dim = 10  # Number of hidden units
output_dim = 2  # Binary classification (2 classes)
num_epochs = 100
learning_rate = 0.01

# Initialize models with and without He initialization
model_no_he_relu = SimpleNN(input_dim, hidden_dim, output_dim, he_init=False)
model_he_relu = SimpleNN(input_dim, hidden_dim, output_dim, he_init=True)

# Train the models
print("\nTraining model without He initialization (ReLU)...")
train_losses_no_he_relu, test_losses_no_he_relu = train_model(
    model_no_he_relu, X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, num_epochs, learning_rate)

print("\nTraining model with He initialization (ReLU)...")
train_losses_he_relu, test_losses_he_relu = train_model(
    model_he_relu, X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor, num_epochs, learning_rate)

# Plot the training and test loss comparison
epochs = range(1, num_epochs+1)
plt.figure(figsize=(12, 6))

# ReLU Activation with and without He Initialization
plt.plot(epochs, test_losses_no_he_relu, label='No He', color='blue')
plt.plot(epochs, test_losses_he_relu, label='He', color='orange')

plt.title('ReLU Activation - He vs No He Initialization')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


## **Dataset de cancer de mama**

Vamos a usar los conceptos introducidos en esta clase en un dataset real. Es un caso de clasificación binaria en casos benignos y malignos.

ID: Número de identificación del paciente.

- Diagnóstico (Diagnosis): Variable objetivo que indica si la muestra es benigna (B) o maligna (M).

10 características de valor real (real-valued features) que describen diferentes propiedades de los núcleos celulares en las imágenes FNA. Estas características incluyen:
- Radio (radius)
- Textura (texture)
- Perímetro (perimeter)
- Área (area)
- Suavidad (smoothness)
- Compacidad (compactness)
- Concavidad (concavity)
- Puntos cóncavos (concave points)
- Simetría (symmetry)
- Dimensión fractal (fractal dimension)


The mean, standard error, and “worst” or largest (mean of the three worst/largest values) of these features were computed for each image, resulting in 30 features

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Load a real-life dataset (Breast Cancer dataset)
data = load_breast_cancer()
X = data.data
y = data.target

In [None]:
data

In [None]:
pd.DataFrame(X)

In [None]:
pd.DataFrame(y)

In [None]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert the data into PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create a DataLoader for mini-batch training
batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Define a neural network with two hidden layers
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, output_size)
        
        # Kaiming Initialization
        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.kaiming_normal_(self.fc2.weight)
        nn.init.kaiming_normal_(self.fc3.weight)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Hyperparameters
input_size = X_train.shape[1]
hidden_size1 = 64
hidden_size2 = 32
output_size = 2  # Binary classification
learning_rate = 0.01
epochs = 100
l1_lambda = 0.001
clip_value = 1.0

# Initialize the model, loss function, and manually handle SGD
model = NeuralNetwork(input_size, hidden_size1, hidden_size2, output_size)
criterion = nn.CrossEntropyLoss()

# For plotting
train_accuracies = []
test_accuracies = []

# Function to calculate accuracy
def calculate_accuracy(outputs, labels):
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == labels).sum().item()
    accuracy = correct / labels.size(0)
    return accuracy

# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    correct_train = 0
    total_train = 0
    
    for X_batch, y_batch in train_loader:
        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # L1 regularization
        l1_norm = sum(torch.sum(torch.abs(param)) for param in model.parameters())
        loss += l1_lambda * l1_norm
        
        # Backward pass
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)
        
        # Manually update weights
        with torch.no_grad():
            for param in model.parameters():
                param -= learning_rate * param.grad
        
        # Zero the gradients after updating
        model.zero_grad()
        
        # Update accuracy for training data
        total_loss += loss.item()
        correct_train += (torch.argmax(outputs, dim=1) == y_batch).sum().item()
        total_train += y_batch.size(0)
    
    # Calculate and record training accuracy
    train_accuracy = correct_train / total_train
    train_accuracies.append(train_accuracy)
    
    # Evaluate on test data
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test_tensor)
        test_loss = criterion(test_outputs, y_test_tensor).item()
        
        # Calculate test accuracy
        test_accuracy = calculate_accuracy(test_outputs, y_test_tensor)
        test_accuracies.append(test_accuracy)
    
    # Print accuracy every 10 epochs
    if epoch % 10 == 0:
        print(f'Epoch [{epoch}/{epochs}], Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}')

# Plot the training and testing accuracy curves
plt.plot(train_accuracies, label='Training Accuracy')
plt.plot(test_accuracies, label='Testing Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Testing Accuracy Curves')
plt.legend()
plt.show()


## **Wine Dataset: clasificación multi-clase**

**Data Set Characteristics**:

**Number of Instances**: 178

**Number of Attributes**: 13 numeric, predictive attributes and the class

**Attribute Information**:
1. Alcohol

2. Malic acid

3. Ash

4. Alcalinity of ash

5. Magnesium

6. Total phenols

7. Flavanoids

8. Nonflavanoid phenols

9. Proanthocyanins

10. Color intensity

11. Hue

12. OD280/OD315 of diluted wines

13. Proline

**class**:
1. class_0

2. class_1

3. class_2

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the Wine dataset
data = load_wine()
X = data.data
y = data.target


In [None]:
data

In [None]:
pd.DataFrame(X)

In [None]:
pd.DataFrame(y)

In [None]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert the data into PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create a DataLoader for mini-batch training
batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Define a neural network with two hidden layers
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, output_size)
        
        # Kaiming Initialization
        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.kaiming_normal_(self.fc2.weight)
        nn.init.kaiming_normal_(self.fc3.weight)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Hyperparameters
input_size = X_train.shape[1]
hidden_size1 = 64
hidden_size2 = 32
output_size = len(data.target_names)  # 3 classes for Wine dataset
learning_rate = 0.01
epochs = 100
clip_value = 1.0

# Initialize the model, loss function, and manually handle SGD
model = NeuralNetwork(input_size, hidden_size1, hidden_size2, output_size)
criterion = nn.CrossEntropyLoss()

# For plotting
train_losses = []
test_losses = []

# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    
    for X_batch, y_batch in train_loader:
        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward pass
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)
        
        # Manually update weights
        with torch.no_grad():
            for param in model.parameters():
                param -= learning_rate * param.grad
        
        # Zero the gradients after updating
        model.zero_grad()
        
        total_loss += loss.item()
    
    # Record training loss
    avg_train_loss = total_loss / len(train_loader)
    train_losses.append(avg_train_loss)
    
    # Evaluate on test data
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test_tensor)
        test_loss = criterion(test_outputs, y_test_tensor).item()
        test_losses.append(test_loss)
    
    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print(f'Epoch [{epoch}/{epochs}], Training Loss: {avg_train_loss:.4f}, Test Loss: {test_loss:.4f}')

# Plot the training and testing loss curves
plt.plot(train_losses, label='Training Loss')
plt.plot(test_losses, label='Testing Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Testing Loss Curves')
plt.legend()
plt.show()

## **Diabetes Dataset : regresión**

Ten baseline variables, age, sex, body mass index, average blood pressure, and six blood serum measurements were obtained for each of n = 442 diabetes patients, as well as the response of interest, a quantitative measure of disease progression one year after baseline.

**Dataset characteristics:**

**Number of Instances**: 442

**Number of Attributes**: First 10 columns are numeric predictive values

**Target**: Column 11 is a quantitative measure of disease progression one year after baseline

**Attribute Information**:

1. Age: age in years

2. Sex

3. BMI: body mass index

4. BP: average blood pressure

5. S1: tc, total serum cholesterol

6. S2: ldl, low-density lipoproteins

7. S3: hdl, high-density lipoproteins

8. S4: tch, total cholesterol / HDL

9. S5: ltg, possibly log of serum triglycerides level

10. S6: glu, blood sugar level

Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times the square root of n_samples (i.e. the sum of squares of each column totals 1).

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load a regression dataset (Diabetes)
data = load_diabetes()
X = data.data
y = data.target


In [None]:
pd.DataFrame(X)

In [None]:
pd.DataFrame(y)

In [None]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert the data into PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# Create a DataLoader for mini-batch training
batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Define a neural network with two hidden layers
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, output_size)
        
        # Kaiming Initialization
        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.kaiming_normal_(self.fc2.weight)
        nn.init.kaiming_normal_(self.fc3.weight)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Hyperparameters
input_size = X_train.shape[1]
hidden_size1 = 64
hidden_size2 = 32
output_size = 1  # Regression output
learning_rate = 0.1
epochs = 100
clip_value = 0.5

# Initialize the model, loss function, and manually handle SGD
model = NeuralNetwork(input_size, hidden_size1, hidden_size2, output_size)
criterion = nn.MSELoss()

# For plotting
train_losses = []
test_losses = []

# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    
    for X_batch, y_batch in train_loader:
        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward pass
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)
        
        # Manually update weights
        with torch.no_grad():
            for param in model.parameters():
                param -= learning_rate * param.grad
        
        # Zero the gradients after updating
        model.zero_grad()
        
        total_loss += loss.item()
    
    # Record training loss
    avg_train_loss = total_loss / len(train_loader)
    train_losses.append(avg_train_loss)
    
    # Evaluate on test data
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test_tensor)
        test_loss = criterion(test_outputs, y_test_tensor).item()
        test_losses.append(test_loss)
    
    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print(f'Epoch [{epoch}/{epochs}], Training Loss: {avg_train_loss:.4f}, Test Loss: {test_loss:.4f}')

# Plot the training and testing loss curves
plt.plot(train_losses, label='Training Loss')
plt.plot(test_losses, label='Testing Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.title('Training and Testing Loss Curves')
plt.legend()
plt.show()
