*   **Data Preparation:**
    
    *   Use any small dataset of your choice from `torchvision`, such as CIFAR-10.
    *   Prepare data loaders using `torch.utils.data.DataLoader`.
*   **k-Fold Cross-Validation Implementation:**
    
    *   Create a function that implements k-fold cross-validation.
    *   Split the dataset into k folds, training on k-1 folds, and testing on the remaining fold.
    *   Compute the accuracy of the model for each fold.
*   **Network Definition:**
    
    *   Define a simple neural network with at least one hidden layer using `torch.nn.Module`.
*   **Training and Evaluation:**
    
    *   For each fold, train the model on the training data and evaluate on the validation data.
    *   Report the mean and standard deviation of the accuracy across all folds.

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import KFold
import numpy as np

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


100.0%


Extracting ./data\cifar-10-python.tar.gz to ./data


In [5]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(32 * 32 * 3, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 32 * 32 * 3)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [6]:
# k-Fold Cross-Validation

k = 5
kf = KFold(n_splits=k)
accuracies = []

for train_idx, test_idx in kf.split(dataset):
    train_subset = Subset(dataset, train_idx)
    test_subset = Subset(dataset, test_idx)

    train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_subset, batch_size=64, shuffle=False)

    model = SimpleNN()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    for epoch in range(5):
        model.train()
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    accuracies.append(accuracy)
    print(f'Fold Accuracy: {accuracy}%')

print(f'Mean Accuracy: {np.mean(accuracies)}%')
print(f'Standard Deviation: {np.std(accuracies)}%')

Fold Accuracy: 50.29%
Fold Accuracy: 49.28%
Fold Accuracy: 51.17%
Fold Accuracy: 50.2%
Fold Accuracy: 50.12%
Mean Accuracy: 50.212%
Standard Deviation: 0.6001799730080972%


Explanation: A simple neural network with three linear layers and ReLU activations is defined.



### Exercise 2: Dropout Regularization

#### Goal

Apply dropout regularization to a neural network to improve its robustness.

#### Instructions

1.  **Data Preparation:**
    
    *   Use the same dataset as in Exercise 1.
2.  **Network Definition:**
    
    *   Define a neural network with multiple layers using `torch.nn.Module`.
    *   Add dropout layers after each hidden layer using `torch.nn.Dropout`.
3.  **Training and Evaluation:**
    
    *   Train the model on the training dataset, evaluating on a separate validation set.
    *   Experiment with different dropout rates (e.g., 0.2, 0.5, 0.7).
    *   Observe how dropout affects the model's performance, especially underfitting and overfitting.
4.  **Analysis:**
    
    *   Plot training and validation accuracy curves for different dropout rates.
    *   Discuss the results, noting any significant changes in accuracy or overfitting.

In [7]:
# Red Neuronal con Dropout
class DropoutCNN(nn.Module):
    def __init__(self, dropout_rate):
        super(DropoutCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(16 * 16 * 16, 120)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(120, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool(x)
        x = x.view(-1, 16 * 16 * 16)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [8]:
def train_evaluate(dropout_rate):
    model = DropoutCNN(dropout_rate)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01)
    train_loader = DataLoader(dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(dataset, batch_size=64, shuffle=False)

    for epoch in range(5):
        model.train()
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return 100 * correct / total

# Evaluacion con diferentes ratios dropout
dropout_rates = [0.2, 0.5, 0.7]
accuracies = [train_evaluate(rate) for rate in dropout_rates]

print("dropout_rates: ",dropout_rates)
print("accuracies: ",accuracies)

dropout_rates:  [0.2, 0.5, 0.7]
accuracies:  [53.12, 53.118, 52.682]


### Exercise 3: L1 Regularization

#### Goal

Implement L1 regularization to constrain model weights and reduce overfitting.

#### Instructions

1.  **Data Preparation:**
    
    *   Use the same dataset as in Exercise 1.
2.  **Network Definition:**
    
    *   Define a neural network with at least one hidden layer using `torch.nn.Module`.
3.  **L1 Regularization Implementation:**
    
    *   Add L1 regularization to the loss function by including the sum of the absolute values of model weights.
    *   Manually calculate the L1 loss and add it to the main loss function.
4.  **Training and Evaluation:**
    
    *   Train the model with and without L1 regularization.
    *   Compare the results in terms of overfitting and weight sparsity.
5.  **Analysis:**
    *   Plot weight histograms to visualize the effect of L1 regularization.
    *   Report accuracy and loss for both regularized and non-regularized models.

In [22]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.conv3 = nn.Conv2d(64, 128, 3)
        self.conv4 = nn.Conv2d(128, 256, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(256 * 2 * 2, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()


In [23]:
# Función de pérdida
criterion = nn.CrossEntropyLoss()

# Optimizador
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

for epoch in range(10):  # Cuanto mas pongamos, mas acuracy
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        
        # Regularización L1
        l1_lambda = 0.001
        l1_loss = 0
        for param in net.parameters():
            l1_loss += torch.norm(param, 1)
        loss += l1_lambda * l1_loss
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
print('Finished Training')

RuntimeError: Calculated padded input size per channel: (2 x 2). Kernel size: (3 x 3). Kernel size can't be greater than actual input size

In [20]:
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))


Accuracy of the network on the 10000 test images: 9 %
