<a href="https://colab.research.google.com/github/cabamarcos/SuperMask/blob/main/Intentos_keras/Keras_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [83]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.utils import to_categorical

import numpy as np
import matplotlib.pyplot as plt
import copy
from ast import Param

In [84]:
# Verificar si la GPU está disponible y establecer el dispositivo
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


Cargamos los datos, los normalizamo sy los ponemos en one-hot

In [85]:
# Cargar el conjunto de datos MNIST
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalizar los datos
x_train = x_train / 255.0
x_test = x_test / 255.0

# Convertir las etiquetas a formato one-hot
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

Creamos las redes

In [86]:
# Definir la función para crear el modelo
def create_model():
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28)))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    return model

In [87]:
# Crear dos modelos con la misma estructura
net = create_model()
mask = create_model()

In [88]:
print(net.summary())

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_10 (Flatten)        (None, 784)               0         
                                                                 
 dense_20 (Dense)            (None, 128)               100480    
                                                                 
 dense_21 (Dense)            (None, 10)                1290      
                                                                 
Total params: 101770 (397.54 KB)
Trainable params: 101770 (397.54 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [89]:
def save_model_parameters_to_file(model, model_name, filename):
    with open(filename, 'w') as f:
        f.write(f"Parameters of model: {model_name}\n")
        for layer in model.layers:
            if isinstance(layer, Dense):
                weights, biases = layer.get_weights()
                f.write(f"Layer: {layer.name}\n")
                f.write("Weights:\n")
                np.savetxt(f, weights, fmt='%.4f')  # Guarda los pesos en el archivo
                f.write("Biases:\n")
                np.savetxt(f, biases, fmt='%.4f')  # Guarda los sesgos en el archivo
                f.write("\n")

In [90]:
# from utils.save_parameters import save_model_parameters_to_file
# # Guardar los parámetros en archivos
# save_model_parameters_to_file(net, "net", "net_parameters.txt")
# save_model_parameters_to_file(mask, "mask", "mask_parameters.txt")

In [91]:


def apply_mask(net, mask, percentage=30):
    # Obtiene los pesos de ambos modelos
    mask_weights = mask.get_weights()
    net_weights = net.get_weights()

    # Procesa cada capa
    new_weights = []
    for mw, nw in zip(mask_weights, net_weights):
        # Aplana los pesos para facilitar la manipulación
        mw_flat = mw.flatten()

        # Determina el umbral para el porcentaje dado
        threshold = np.percentile(mw_flat, 100 - percentage)

        # Crea la máscara con 1s para el porcentaje más alto y 0s para el porcentaje más bajo
        mask = np.where(mw >= threshold, 1, 0)

        # Aplica la máscara a los pesos de net
        new_w = nw * mask

        # Reshapea los pesos al formato original
        new_weights.append(new_w)

    # Asigna los nuevos pesos al modelo net
    net.set_weights(new_weights)


In [92]:
# apply_mask(net, mask)

In [93]:
# # Guardar los parámetros en archivos
# save_model_parameters_to_file(net, "net", "net_parameters_after.txt")
# save_model_parameters_to_file(mask, "mask", "mask_parameters_after.txt")

In [94]:
def verify(model):
    print("Verifying model:")
    for layer in model.layers:
        if isinstance(layer, Dense):
            weights, _ = layer.get_weights()
            total_weights = weights.size
            null_weights = np.sum(weights == 0)
            null_percentage = (null_weights / total_weights) * 100
            print(f"Layer: {layer.name}")
            print(f"Total weights: {total_weights}")
            print(f"Null weights: {null_weights}")
            print(f"Percentage of null weights: {null_percentage:.2f}%")
            print("\n")


In [95]:
verify(net)

Verifying model:
Layer: dense_20
Total weights: 100352
Null weights: 0
Percentage of null weights: 0.00%


Layer: dense_21
Total weights: 1280
Null weights: 0
Percentage of null weights: 0.00%




Optimizadores del código con función de pérdida

In [96]:
net.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'loss'])
mask.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'loss'])

# Definir el optimizador para la red mask
optimizer_mask = tf.keras.optimizers.Adam()

# Definir la función de pérdida que se utilizará
loss_function = tf.keras.losses.CategoricalCrossentropy()

In [97]:
# Ensure the mask model's variables are trainable
for var in mask.trainable_variables:
    var._trainable = True

In [98]:
train_loss = []
test_accuracies = []
epochs = 10
accuracy_threshold = 0.6
original_weights = net.get_weights()

# Definir el tamaño del lote
batch_size = 32

# Bucle de entrenamiento
for epoch in range(epochs):
  print(f'Epoch {epoch + 1}/{epochs}')

  #aplicamos los pesos originales a net
  net.set_weights(original_weights)

  # Aplicamos la máscara a net
  apply_mask(net, mask)

  running_loss = 0.0
  #pasar los batches por net_masked
  for start in range(0, len(x_train), batch_size):
    end = min(start + batch_size, len(x_train))
    x_batch = x_train[start:end]
    y_batch = y_train[start:end]

    # Compute gradients of mask w.r.t loss of net_masked
    with tf.GradientTape() as tape_mask:
      predicted = net(x_batch)
      loss = loss_function(y_batch, predicted)

    # Ensure the tape is watching the correct variables
    tape_mask.watch(mask.trainable_variables)

    # Calcular los gradientes para actualizar la red mask
    grads_mask = tape_mask.gradient(loss, mask.trainable_variables)
    print(grads_mask)
    # Aplicar los gradientes a la red mask
    optimizer_mask.apply_gradients(zip(grads_mask, mask.trainable_variables))

    running_loss += loss.numpy()

  train_loss.append(running_loss / (len(x_train)/batch_size))

  # test en la red mask
  predictions = mask(x_test)
  accuracy = np.mean(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1))
  test_accuracies.append(accuracy)

  print(f'Epoch {epoch + 1}, Loss: {running_loss / (len(x_train)/batch_size)}, Accuracy: {accuracy}')

  # Paramos el entrenamiento si la precisión en validación supera el 60%
  if accuracy > accuracy_threshold:
    break

Epoch 1/10
[None, None, None, None]


ValueError: No gradients provided for any variable: (['dense_22/kernel:0', 'dense_22/bias:0', 'dense_23/kernel:0', 'dense_23/bias:0'],). Provided `grads_and_vars` is ((None, <tf.Variable 'dense_22/kernel:0' shape=(784, 128) dtype=float32, numpy=
array([[ 0.04435576, -0.05257457, -0.06601036, ..., -0.05994757,
        -0.03457224,  0.0791545 ],
       [-0.07153916, -0.0288265 , -0.06801061, ...,  0.04388655,
        -0.06235685, -0.00575858],
       [ 0.01052658, -0.03648903, -0.07280458, ...,  0.02799095,
         0.0266562 ,  0.05452526],
       ...,
       [ 0.03057525, -0.0415886 , -0.05455653, ..., -0.03542001,
         0.05967753, -0.02094794],
       [ 0.02887145, -0.07213977, -0.0554945 , ...,  0.01381391,
         0.02687215, -0.07300194],
       [-0.02398338, -0.00298727,  0.0680036 , ...,  0.00156865,
        -0.02981233,  0.07828125]], dtype=float32)>), (None, <tf.Variable 'dense_22/bias:0' shape=(128,) dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>), (None, <tf.Variable 'dense_23/kernel:0' shape=(128, 10) dtype=float32, numpy=
array([[-0.18748824,  0.11599819, -0.04935299, ..., -0.09541741,
        -0.13119487, -0.10926009],
       [-0.11402769, -0.09944765,  0.01478684, ...,  0.17572515,
        -0.03331107,  0.01810832],
       [-0.03784253, -0.07226211, -0.13889036, ...,  0.2056237 ,
        -0.09866063,  0.04432331],
       ...,
       [ 0.17824091,  0.11549698, -0.17143296, ...,  0.03464176,
        -0.10110028, -0.1716152 ],
       [ 0.17962302, -0.15479481, -0.14668085, ...,  0.18476464,
        -0.17129643,  0.07770403],
       [ 0.06548627,  0.01642914,  0.15274598, ..., -0.1420272 ,
         0.07414104,  0.10200359]], dtype=float32)>), (None, <tf.Variable 'dense_23/bias:0' shape=(10,) dtype=float32, numpy=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>)).