In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


import numpy as np
import matplotlib.pyplot as plt
import copy
from ast import Param

from utils.prune import prune_weights
from utils.count_improvement import improvements

In [21]:
# Verificar si la GPU está disponible y establecer el dispositivo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [22]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
        



Definimos una red y le copiamos los pesos en una lista

In [23]:
net = Net().to(device)
varianzas_net = Net().to(device)

varianzas = []
for param in varianzas_net.parameters():
    varianzas.extend(param.data.clone().flatten().tolist())
print(varianzas_net.state_dict())


OrderedDict({'fc1.weight': tensor([[ 0.0184, -0.0001, -0.0021,  ..., -0.0217, -0.0259,  0.0186],
        [ 0.0299,  0.0263,  0.0074,  ..., -0.0094, -0.0107,  0.0339],
        [ 0.0220,  0.0030, -0.0151,  ...,  0.0268, -0.0103, -0.0105],
        ...,
        [ 0.0310, -0.0039,  0.0035,  ..., -0.0110,  0.0262,  0.0322],
        [-0.0116,  0.0044,  0.0302,  ...,  0.0343, -0.0240,  0.0010],
        [ 0.0102,  0.0334, -0.0047,  ..., -0.0038,  0.0116,  0.0007]],
       device='cuda:0'), 'fc1.bias': tensor([-1.5143e-02,  3.3856e-02,  1.6547e-02,  1.5167e-02, -1.3145e-02,
         7.6727e-03, -2.9181e-02,  1.7962e-02, -1.3633e-02, -1.6690e-02,
         8.8613e-03,  9.5347e-04, -3.3238e-02,  1.1129e-02, -2.2256e-02,
         3.0368e-02,  3.0171e-02, -2.0882e-02, -3.0912e-03, -7.5424e-03,
         2.0892e-02,  3.2407e-03,  1.5855e-02, -2.3921e-03,  1.5079e-02,
        -3.0563e-02, -8.3430e-03,  3.2424e-02, -2.3980e-02,  3.3595e-02,
         1.9144e-02,  5.4057e-04,  1.4853e-03,  2.0683e-02,  5.3

In [24]:
print(len(varianzas))
print(varianzas[0])

235146
0.01838049292564392


In [25]:
# Calculate the number of weights
num_weights = sum(p.numel() for p in varianzas_net.parameters() if p.requires_grad)
print(f"Number of weights: {num_weights}")

Number of weights: 235146


Cargamos los datos

In [26]:
# Definimos el transform para los datos de MNIST
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Cargamos el dataset de MNIST
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, download=True, transform=transform)

# Definimos los DataLoaders para los conjuntos de entrenamiento y prueba
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

In [27]:
# Definimos la función de pérdida para calcular el error
criterion = nn.CrossEntropyLoss()

In [28]:
# train loop
train_loss = []
test_accuracies = []
epochs = 10
accuracy_threshold = 0.6

for epoch in range(epochs):
    print(f"Epoch {epoch + 1}")
    if epoch + 1 == 1:
        pruned_net = prune_weights(net)

        running_loss = 0.0
        # Pasamos todas las imagenes de train por la red net
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            outputs_net = pruned_net(images)
            loss = criterion(outputs_net, labels)

            running_loss += loss.item()
        
        train_loss.append(running_loss / len(train_loader))
        print(f"Train loss: {running_loss / len(train_loader)}")

        # Evaluamos el modelo en el conjunto de test
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)

                outputs_net = pruned_net(images)
                _, predicted = torch.max(outputs_net.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        test_accuracies.append(correct / total)
        print(f"Test accuracy: {correct / total}")

        if correct / total > accuracy_threshold:
            break
    
    else:
        # Crear una copia de la red neuronal original
        varied_net = copy.deepcopy(net)

        # Actualizar los pesos de la red neuronal copiada
        varied_net.weights = [peso + varianza for peso, varianza in zip(varied_net.weights, varianzas)]

        pruned_net = prune_weights(varied_net)

        running_loss = 0.0
        # Pasamos todas las imagenes de train por la red net
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            outputs_net = pruned_net(images)
            loss = criterion(outputs_net, labels)

            running_loss += loss.item()
        
        train_loss.append(running_loss / len(train_loader))
        print(f"Train loss: {running_loss / len(train_loader)}")

        # Evaluamos el modelo en el conjunto de test
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)

                outputs_net = pruned_net(images)
                _, predicted = torch.max(outputs_net.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        test_accuracies.append(correct / total)
        print(f"Test accuracy: {correct / total}")

        if correct / total > accuracy_threshold:
            break

        # actualizmos el vector de varianzas
        if improvements(train_loss) == 0:## +mejoras que peoras
            varianzas = [varianza * (1/0.82) for varianza in varianzas]
        
        elif improvements(train_loss) == 1: ## -mejoras que peoras
            varianzas = [varianza * 0.82 for varianza in varianzas]

        



Epoch 1
Train loss: 2.3125341637556485
Test accuracy: 0.0981
Epoch 2


AttributeError: 'Net' object has no attribute 'weights'