# **Diplomatura en Ciencia de Datos - UNNE - 2024**
### Módulo 4: Aprendizaje Automático
### Clase 5: Introducción a las Redes Neuronales Artificiales

Definamos una red sencilla de una sola capa oculta.

In [None]:
import torch
import torch.nn as nn
import torch.utils.data as data
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Generate a synthetic binary classification dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, n_informative=15, random_state=42)

In [None]:
X.shape

In [None]:
X[0].shape

In [None]:
y.shape

In [None]:
y

In [None]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Create dataset and data loader
train_dataset = data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = data.DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define the model with 1 hidden layer
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(20, 64)  # Hidden layer
        self.fc2 = nn.Linear(64, 1)   # Output layer

        # Activation function
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))  # ReLU for the hidden layer
        x = self.sigmoid(self.fc2(x))  # Sigmoid for the output layer (binary classification)
        return x

# Instantiate the model and loss function
model = SimpleNN()
criterion = nn.BCELoss()  # Binary Cross-Entropy for binary classification

# Hyperparameters
learning_rate = 0.01
epochs = 100

train_losses = []
test_losses = []

# Training loop
for epoch in range(epochs):
    total_train_loss = 0
    total_test_loss = 0

    # Training phase
    model.train()
    for data_batch, target_batch in train_loader:
        output = model(data_batch)
        loss = criterion(output, target_batch)

        # Zero the gradients
        model.zero_grad()

        # Backpropagation
        loss.backward()

        # Manual weight updates
        with torch.no_grad():
            for param in model.parameters():
                param -= learning_rate * param.grad

        total_train_loss += loss.item()

    # Calculate training loss for the epoch
    avg_train_loss = total_train_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # Testing phase
    model.eval()
    with torch.no_grad():
        test_output = model(X_test_tensor)
        test_loss = criterion(test_output, y_test_tensor)
        avg_test_loss = test_loss.item()
        test_losses.append(avg_test_loss)

    print(f"Epoch {epoch + 1}, Train Loss: {avg_train_loss:.4f}, Test Loss: {avg_test_loss:.4f}")

# Plotting the training and testing losses
plt.plot(train_losses, label='Training Loss')
plt.plot(test_losses, label='Testing Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Testing Loss over Epochs')
plt.show()

# Test the final model
with torch.no_grad():
    test_output = model(X_test_tensor)
    test_output = test_output.round()  # Rounding the output to 0 or 1 for binary classification
    accuracy = (test_output == y_test_tensor).float().mean()
    print(f"Final Test Accuracy: {accuracy.item() * 100:.2f}%")


Definamos ahora una red con dos capas ocultas.

In [None]:
import torch
import torch.nn as nn
import torch.utils.data as data
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Generate a synthetic binary classification dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, n_informative=15, random_state=42)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Create dataset and data loader
train_dataset = data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = data.DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define the model with 2 hidden layers and different activation functions
class CustomNN(nn.Module):
    def __init__(self):
        super(CustomNN, self).__init__()
        self.fc1 = nn.Linear(20, 64)  # First hidden layer
        self.fc2 = nn.Linear(64, 32)  # Second hidden layer
        self.fc3 = nn.Linear(32, 1)   # Output layer

        # Activation functions
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))  # ReLU for the first hidden layer
        x = self.tanh(self.fc2(x))  # Tanh for the second hidden layer
        x = self.sigmoid(self.fc3(x))  # Sigmoid for output layer (binary classification)
        return x

# Instantiate the model and loss function
model = CustomNN()
criterion = nn.BCELoss()  # Binary Cross-Entropy for binary classification

# Hyperparameters
learning_rate = 0.01
epochs = 100

train_losses = []
test_losses = []

# Training loop
for epoch in range(epochs):
    total_train_loss = 0
    total_test_loss = 0

    # Training phase
    model.train()
    for data_batch, target_batch in train_loader:
        output = model(data_batch)
        loss = criterion(output, target_batch)

        # Zero the gradients
        model.zero_grad()

        # Backpropagation
        loss.backward()

        # Manual weight updates
        with torch.no_grad():
            for param in model.parameters():
                param -= learning_rate * param.grad

        total_train_loss += loss.item()

    # Calculate training loss for the epoch
    avg_train_loss = total_train_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # Testing phase
    model.eval()
    with torch.no_grad():
        test_output = model(X_test_tensor)
        test_loss = criterion(test_output, y_test_tensor)
        avg_test_loss = test_loss.item()
        test_losses.append(avg_test_loss)

    print(f"Epoch {epoch + 1}, Train Loss: {avg_train_loss:.4f}, Test Loss: {avg_test_loss:.4f}")

# Plotting the training and testing losses
plt.plot(train_losses, label='Training Loss')
plt.plot(test_losses, label='Testing Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Testing Loss over Epochs')
plt.show()

# Test the final model
with torch.no_grad():
    test_output = model(X_test_tensor)
    test_output = test_output.round()  # Rounding the output to 0 or 1 for binary classification
    accuracy = (test_output == y_test_tensor).float().mean()
    print(f"Final Test Accuracy: {accuracy.item() * 100:.2f}%")

# **Regularizacion L1 para un caso de regresion**

## **California Housing dataset**  

Data Set Characteristics:  

**Number of Instances:** 20640

**Number of Attributes:** 8 numeric, predictive attributes and the target

**Attribute Information:**
- MedInc median income in block group
- HouseAge median house age in block group
- AveRooms average number of rooms per household
- AveBedrms average number of bedrooms per household
- Population block group population
- AveOccup average number of household members
- Latitude block group latitude
- Longitude block group longitude

**Missing Attribute Values:** None

The target variable is the median house value for California districts, expressed in hundreds of thousands of dollars ($100,000). This dataset was derived from the 1990 U.S. census, using one row per census block group. A block group is the smallest geographical unit for which the U.S. Census Bureau publishes sample data (a block group typically has a population of 600 to 3,000 people).


In [None]:
import torch
import torch.nn as nn
import torch.utils.data as data
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load the California Housing dataset
california_housing = fetch_california_housing()
X_train, X_test, y_train, y_test = train_test_split(california_housing.data, california_housing.target, test_size=0.3, random_state=42)

In [None]:
california_housing

In [None]:
california_housing.data[0]

In [None]:
california_housing.target[0]

In [None]:

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Create dataset and data loader
train_dataset = data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = data.DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define the model
class HousingNN(nn.Module):
    def __init__(self):
        super(HousingNN, self).__init__()
        self.fc1 = nn.Linear(8, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Instantiate model and loss function
model_no_reg = HousingNN()
model_l1_reg = HousingNN()
criterion = nn.MSELoss()

# Hyperparameters
learning_rate = 0.01
l1_lambda = 0.001
epochs = 100

# To track losses for plotting
train_losses_no_reg = []
train_losses_l1_reg = []

# Training loop for both models
for epoch in range(epochs):
    total_train_loss_no_reg = 0
    total_train_loss_l1_reg = 0

    for data_batch, target_batch in train_loader:
        # Forward pass for model without regularization
        output_no_reg = model_no_reg(data_batch)
        loss_no_reg = criterion(output_no_reg, target_batch)

        # Zero the gradients for the model without regularization
        model_no_reg.zero_grad()

        # Backpropagation for model without regularization
        loss_no_reg.backward()

        # Update weights manually for model without regularization
        with torch.no_grad():
            for param in model_no_reg.parameters():
                param -= learning_rate * param.grad

        total_train_loss_no_reg += loss_no_reg.item()

        # Forward pass for model with L1 regularization
        output_l1_reg = model_l1_reg(data_batch)
        loss_l1_reg = criterion(output_l1_reg, target_batch)

        # Add L1 regularization
        l1_penalty = 0
        for param in model_l1_reg.parameters():
            l1_penalty += torch.sum(torch.abs(param))
        loss_l1_reg += l1_lambda * l1_penalty

        # Zero the gradients for the model with L1 regularization
        model_l1_reg.zero_grad()

        # Backpropagation for model with L1 regularization
        loss_l1_reg.backward()

        # Update weights manually for model with L1 regularization
        with torch.no_grad():
            for param in model_l1_reg.parameters():
                param -= learning_rate * param.grad

        total_train_loss_l1_reg += loss_l1_reg.item()

    # Record losses for each epoch
    train_losses_no_reg.append(total_train_loss_no_reg / len(train_loader))
    train_losses_l1_reg.append(total_train_loss_l1_reg / len(train_loader))

    print(f"Epoch {epoch + 1}, No Reg Loss: {total_train_loss_no_reg:.4f}, L1 Reg Loss: {total_train_loss_l1_reg:.4f}")

# Plotting the training losses
plt.plot(train_losses_no_reg, label='No Regularization')
plt.plot(train_losses_l1_reg, label='L1 Regularization')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training Loss with and without L1 Regularization')
plt.show()

# **Dataset Iris con regularización L2**

## **The Iris Dataset**

This data sets consists of 3 different types of irises’ (Setosa, Versicolour, and Virginica) petal and sepal length, stored in a 150x4 numpy.ndarray

The rows being the samples and the columns being: Sepal Length, Sepal Width, Petal Length and Petal Width.

In [None]:
import torch
import torch.nn as nn
import torch.utils.data as data
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load Iris dataset
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.85, random_state=42)

In [None]:
iris

In [None]:
iris.data[0]

In [None]:
iris.target[0]

In [None]:

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create dataset and data loader
train_dataset = data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = data.DataLoader(train_dataset, batch_size=16, shuffle=True)

# Define the model
class IrisNN(nn.Module):
    def __init__(self):
        super(IrisNN, self).__init__()
        self.fc1 = nn.Linear(4, 16)
        self.fc2 = nn.Linear(16, 3)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate model and loss function
model = IrisNN()
criterion = nn.CrossEntropyLoss()

# Hyperparameters
learning_rate = 0.01
weight_decay = 0.001
epochs = 1000

train_errors = []
test_errors = []

# Training loop
for epoch in range(epochs):
    total_train_loss = 0
    for data, target in train_loader:
        output = model(data)
        loss = criterion(output, target)

        # L2 regularization
        l2_reg = sum(param.norm(2) for param in model.parameters())
        loss += weight_decay * l2_reg

        # Zero the gradients
        model.zero_grad()

        # Backpropagation
        loss.backward()

        # Manual weight updates
        with torch.no_grad():
            for param in model.parameters():
                param -= learning_rate * param.grad

        total_train_loss += loss.item()
    
    # Test the model
    model.eval()
    with torch.no_grad():
        test_output = model(X_test_tensor)
        test_loss = criterion(test_output, y_test_tensor)
        test_loss += weight_decay * sum(param.norm(2) for param in model.parameters())
        test_errors.append(test_loss.item())

    train_errors.append(total_train_loss / len(train_loader))
    model.train()

    print(f"Epoch {epoch + 1}, Train Loss: {total_train_loss:.4f}, Test Loss: {test_loss.item():.4f}")

# Plotting the errors
plt.plot(train_errors, label='Training Error')
plt.plot(test_errors, label='Testing Error')
plt.xlabel('Epochs')
plt.ylabel('Error')
plt.legend()
plt.show()


## Dataset Calidad de Vino

**Input variables (based on physicochemical tests):**

1. fixed acidity
2. volatile acidity
3. citric acid
4. residual sugar
5. chlorides
6. free sulfur dioxide
7. total sulfur dioxide
8. density
9. pH
10. sulphates
11. alcohol

**Output variable (based on sensory data):** 

12. quality (score between 0 and 10)

In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.utils.data as data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load the dataset from UCI repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine = pd.read_csv(url, delimiter=";")

# Split into features and labels
X = wine.drop("quality", axis=1).values
y = wine["quality"].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

In [None]:
wine

In [None]:
X.shape

In [None]:
y.shape

In [None]:

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create dataset and data loader
train_dataset = data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = data.DataLoader(train_dataset, batch_size=16, shuffle=True)

# Define the model
class WineQualityNN(nn.Module):
    def __init__(self):
        super(WineQualityNN, self).__init__()
        self.fc1 = nn.Linear(11, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 10)  # Wine quality can have 10 classes (0-9)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Instantiate model and loss function
model = WineQualityNN()
criterion = nn.CrossEntropyLoss()

# Hyperparameters
learning_rate = 0.001
weight_decay = 0.001
epochs = 1000

train_errors = []
test_errors = []

# Training loop
for epoch in range(epochs):
    total_train_loss = 0
    for data, target in train_loader:
        output = model(data)
        loss = criterion(output, target)

        # L2 regularization
        l2_reg = sum(param.norm(2) for param in model.parameters())
        loss += weight_decay * l2_reg

        # Zero the gradients
        model.zero_grad()

        # Backpropagation
        loss.backward()

        # Manual weight updates
        with torch.no_grad():
            for param in model.parameters():
                param -= learning_rate * param.grad

        total_train_loss += loss.item()

    # Test the model
    model.eval()
    with torch.no_grad():
        test_output = model(X_test_tensor)
        test_loss = criterion(test_output, y_test_tensor)
        test_loss += weight_decay * sum(param.norm(2) for param in model.parameters())
        test_errors.append(test_loss.item())

    train_errors.append(total_train_loss / len(train_loader))
    model.train()

    print(f"Epoch {epoch + 1}, Train Loss: {total_train_loss:.4f}, Test Loss: {test_loss.item():.4f}")

# Plotting the errors
plt.plot(train_errors, label='Training Error')
plt.plot(test_errors, label='Testing Error')
plt.xlabel('Epochs')
plt.ylabel('Error')
plt.legend()
plt.show()