In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.init as init

import numpy as np
import matplotlib.pyplot as plt
import copy
from ast import Param
import json
import math

from utils.prune import apply_mask
from utils.count_improvement import improvements
from utils.normalize import normalize_weights

In [2]:
# Verificar si la GPU está disponible y establecer el dispositivo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

In [4]:
net = Net().to(device)
varianzas_net = Net().to(device)
individuo = Net().to(device)

Cargamos los datos

In [5]:
# Definimos el transform para los datos de MNIST
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Cargamos el dataset de MNIST
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, download=True, transform=transform)

# Definimos los DataLoaders para los conjuntos de entrenamiento y prueba
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False, num_workers=4)

In [6]:
# Definimos la función de pérdida para calcular el error
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

Realizamos el "entrenamiento"

In [None]:
# train loop
train_loss = []
test_accuracies = []
n_individuo = 1
accuracy_threshold = 0.7
best_loss = 100000 # Inicializamos con un valor muy alto para que el primer valor sea mas bajo

#Guardo el mejor individuo y la red original que tendrá que ser reestablecida cad epoca
best_individo_state_dict = individuo.state_dict()
net_state_dict = net.state_dict()

#train loop
while True:
    net.load_state_dict(net_state_dict)
    if n_individuo == 1:
        epoch = 1
        print(f"Individuo {n_individuo}")
        while epoch < 10:
            print(f"Epoch {epoch}")
            net = apply_mask(net, individuo)
            #guardar el estado de la red en un txt en una carpeta
            torch.save(net.state_dict(), f"individuo{n_individuo}_epoch{epoch}.txt")
            net.train()
            running_loss = 0.0
            # Train for 1 epoch
            for i, data in enumerate(train_loader, 0):
                inputs, labels = data[0].to(device), data[1].to(device)
                optimizer.zero_grad()
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            train_loss.append(running_loss / len(train_loader))
            print(f"Loss: {train_loss[-1]}")
            print(net.state_dict())
            epoch += 1
        net.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for data in test_loader:
                images, labels = data[0].to(device), data[1].to(device)
                outputs = net(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = correct / total
        print(f"Accuracy: {accuracy}, loss mean: {np.mean(train_loss)}")
        train_loss.append(np.mean(train_loss))
        test_accuracies.append(accuracy)
        if accuracy > accuracy_threshold:
            break
    else:
        n_individuo += 1
        print(f"Individuo {n_individuo}")



Individuo 1
Epoch 1
Loss: 0.2815589236000231
OrderedDict([('fc1.weight', tensor([[ 0.0021, -0.0124,  0.0021,  ...,  0.0021, -0.0298,  0.0021],
        [ 0.0092,  0.0092,  0.0092,  ...,  0.0236, -0.0198,  0.0092],
        [ 0.0086,  0.0086,  0.0086,  ...,  0.0233,  0.0086,  0.0411],
        ...,
        [ 0.0059,  0.0243,  0.0059,  ...,  0.0059,  0.0385,  0.0059],
        [-0.0064, -0.0283, -0.0064,  ..., -0.0057, -0.0064, -0.0064],
        [ 0.0156,  0.0086,  0.0009,  ...,  0.0009,  0.0009,  0.0009]],
       device='cuda:0')), ('fc1.bias', tensor([-2.1335e-03, -9.2309e-03, -8.6142e-03,  6.6739e-03, -9.2162e-03,
        -7.2078e-03,  1.7570e-02, -3.2669e-03,  5.3182e-04, -8.6435e-03,
         1.5935e-03, -1.0343e-02,  1.6168e-03, -2.0332e-02, -6.4627e-03,
        -1.2016e-03, -1.1362e-02, -2.5403e-03, -3.4368e-02,  4.0939e-03,
         6.4165e-03, -3.7959e-04,  2.9554e-02,  1.4121e-03,  2.2137e-02,
        -1.6450e-03, -7.0992e-03, -2.7039e-03, -1.9772e-04,  4.1743e-02,
        -8.4263e

In [8]:
print(train_loss)
print(test_accuracies)


[0.2815589236000231, 0.10272867811534966, 0.07124243268389692, 0.05663462126389769, 0.04534182917935573, 0.03713084293581145, 0.03211992700904871, 0.028539591061417013, 0.02555766844126337, 0.07565050158778484]
[0.9774]
