La idea es implementar una red neuronal que tenga como entrada el vector $u_i$ y como salida: $c$, $A$ y $b$. Que son los parametros del modelo de optimizacion. La funcion de perdida para entrenar la red neuronal sera la siguiente:

$$
\mathcal{L} = \frac{1}{N} \sum_{i=1}^{N} \left\| x_i - \hat{x}_i \right\|_2^2
$$
donde $\hat{x}_i$ es la solucion del modelo de optimizacion con parametros $c$, $A$ y $b$.

In [5]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()

In [17]:
model.parameters()

<generator object Module.parameters at 0x00000222B2D4B060>

In [12]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

In [13]:
# Initialize the loss function
loss_fn = nn.CrossEntropyLoss()

In [14]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [15]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * batch_size + len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [16]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.296704  [   64/60000]
loss: 2.286694  [ 6464/60000]
loss: 2.268586  [12864/60000]
loss: 2.268380  [19264/60000]
loss: 2.255245  [25664/60000]
loss: 2.215268  [32064/60000]
loss: 2.235418  [38464/60000]
loss: 2.189306  [44864/60000]
loss: 2.192062  [51264/60000]
loss: 2.164429  [57664/60000]
Test Error: 
 Accuracy: 44.3%, Avg loss: 2.156784 

Epoch 2
-------------------------------
loss: 2.165854  [   64/60000]
loss: 2.156527  [ 6464/60000]
loss: 2.095820  [12864/60000]
loss: 2.117990  [19264/60000]
loss: 2.084201  [25664/60000]
loss: 2.010549  [32064/60000]
loss: 2.048114  [38464/60000]
loss: 1.958934  [44864/60000]
loss: 1.962681  [51264/60000]
loss: 1.902870  [57664/60000]
Test Error: 
 Accuracy: 60.0%, Avg loss: 1.893457 

Epoch 3
-------------------------------
loss: 1.920831  [   64/60000]
loss: 1.897100  [ 6464/60000]
loss: 1.772069  [12864/60000]
loss: 1.818732  [19264/60000]
loss: 1.739486  [25664/60000]
loss: 1.665547  [32064/600

funcion loss personalizada:

In [18]:
def my_loss(output, target):
    loss = torch.mean((output - target)**2)
    return loss

model = nn.Linear(2, 2)
x = torch.randn(1, 2)
target = torch.randn(1, 2)
output = model(x)
loss = my_loss(output, target)
loss.backward()
print(model.weight.grad)

tensor([[0.0005, 0.0009],
        [0.0310, 0.0484]])


In [12]:
import numpy as np
import deep_inv_opt as io
import deep_inv_opt.plot as iop
import deep_inv_opt.linprog

In [3]:
# ahora mi funcion de perdida adaptada a mi modelo
def my_loss(output, target):
    # output es la salida de la red neuronal, que me devuelve la matriz A y el vector b
    # y el vector c de coste
    c = output[0:2]
    A = output[2:6].reshape(2, 2)
    b = output[6:8]
    rs = solver.linprog(c, A, b)
    loss = torch.sum((rs-target)**2)
    return loss

In [56]:
# voy a probar si funciona bien linprog
c = torch.tensor([1.0, -1.0])
A = torch.tensor([[1.0, 0.0], [0.0, 1.0]])
b = torch.tensor([[1.0], [1.0]])

In [53]:
deep_inv_opt.linprog(c, A, b)

RuntimeError: Boolean value of Tensor with more than one value is ambiguous

In [54]:
def smooth_lp(c, A, b):
    # x inicial aleatorio
    x = torch.zeros(A.shape[1], requires_grad=True)

    # Definir el optimizador
    optimizer = torch.optim.SGD([x], lr=0.01)

    # Optimizar para encontrar x que minimiza c^T x sujeto a Ax <= b
    for _ in range(100):  # Pequeño número de pasos
        optimizer.zero_grad()

        # Penalizar violación de restricciones
        constraint_penalty = torch.sum(torch.relu(A @ x - b))
        
        # Función objetivo relajada
        objective = torch.dot(c, x) + 100.0 * constraint_penalty
        objective.backward()
        optimizer.step()

    return x.detach()

def my_loss(output, target):
    c = output[0:2]
    A = output[2:6].reshape(2, 2)
    b = output[6:8]

    # Resolver el LP suavizado
    rs = smooth_lp(c, A, b)
    loss = torch.sum((rs - target) ** 2)
    return loss


In [None]:
smooth_lp(c, A, b)

(tensor([-1.0000,  1.0000]), tensor([[1., 1.]]))

In [64]:
A @ smooth_lp(c, A, b) - b.t()

tensor([[-2.0000e+00, -6.5565e-07]])

voy a probar a ver si funciona con la funcion de perdida de la competencia

In [49]:
import torch
from torch.utils.data import Dataset, DataLoader

# Crear un dataset simple de vectores u
class UDataset(Dataset):
    def __init__(self, data):
        self.data = torch.tensor(data, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Dataset de entrada (vectores u)
u_data = [[1.0], [2.0], [3.0], [4.0]]
dataset = UDataset(u_data)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [50]:
import torch.nn as nn

class ParametricLPNet(nn.Module):
    def __init__(self):
        super(ParametricLPNet, self).__init__()
        # Entrada de dimensión 1, salida 8 (2 para c, 4 para A, 2 para b)
        self.fc = nn.Sequential(
            nn.Linear(1, 16),
            nn.ReLU(),
            nn.Linear(16, 8)  # c (2), A (4), b (2)
        )

    def forward(self, u):
        output = self.fc(u)
        c = output[:, 0:2]      # Vector de costes
        A = output[:, 2:6].reshape(-1, 2, 2)  # Matriz A (2x2)
        b = output[:, 6:8]      # Vector de restricciones
        return c, A, b


In [None]:
def smooth_lp(c, A, b):
    # Inicializar x con gradientes habilitados
    x = torch.zeros(A.shape[1], requires_grad=True)

    optimizer = torch.optim.SGD([x], lr=0.01)

    for _ in range(100):
        optimizer.zero_grad()
        constraint_penalty = torch.sum(torch.relu(A @ x - b))
        objective = torch.dot(c, x) + 100.0 * constraint_penalty
        objective.backward(retain_graph=True)  # Mantén el grafo activo
        optimizer.step()
    return x  # Sin detach()


In [52]:

def my_loss(c, A, b, target):
    rs = smooth_lp(c, A, b)
    loss = torch.sum((rs - target) ** 2)
    return loss


In [53]:
# Crear la red neuronal
model = ParametricLPNet()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Valores objetivo de ejemplo (target), en mi caso son los valores de x
target = torch.tensor([1.0, 1.5])

# Entrenamiento
for epoch in range(100):
    for u_batch in dataloader:
        c, A, b = model(u_batch)

        # Calcular la pérdida
        loss = my_loss(c[0], A[0], b[0], target)

        # Backpropagation y optimización
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")


Epoch 0, Loss: 0.8327734470367432
Epoch 10, Loss: 0.8327734470367432
Epoch 20, Loss: 0.2942815124988556
Epoch 30, Loss: 2.3538384437561035
Epoch 40, Loss: 0.2942815124988556
Epoch 50, Loss: 2.7194652557373047
Epoch 60, Loss: 0.8327734470367432
Epoch 70, Loss: 2.7194652557373047
Epoch 80, Loss: 0.2942815124988556
Epoch 90, Loss: 0.8327734470367432


todo junto y con una buena estructura:

In [59]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import numpy as np

# Dataset simple de vectores u
class UDataset(Dataset):
    def __init__(self, data):
        self.data = torch.tensor(data, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], torch.tensor([1.0, 1.5])  # El target es fijo en tu caso

# Dataset y DataLoader
u_data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0], [7.0], [8.0], [9.0], [10.0], [11.0], [12.0]]
# genero un dataset u aleatorio y grande en el que cada elemento es un tensor de 1x1
# u_data = np.random.rand(1000, 1)
# u_data = [[u] for u in u_data]

train_dataloader = DataLoader(UDataset(u_data), batch_size=2, shuffle=True)

class ParametricLPNet(nn.Module):
    def __init__(self):
        super(ParametricLPNet, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(1, 16),
            nn.ReLU(),
            nn.Linear(16, 8)
        )

    def forward(self, u):
        output = self.fc(u)
        c = output[:, 0:2]
        A = output[:, 2:6].reshape(-1, 2, 2)
        b = output[:, 6:8]
        return c, A, b

def smooth_lp(c, A, b):
    x = torch.zeros(A.shape[1], requires_grad=True)
    optimizer = torch.optim.SGD([x], lr=0.01)

    for _ in range(100):
        optimizer.zero_grad()
        constraint_penalty = torch.sum(torch.relu(A @ x - b))
        objective = torch.dot(c, x) + 100.0 * constraint_penalty
        objective.backward(retain_graph=True)
        optimizer.step()
    return x

def my_loss(c, A, b, target):
    rs = smooth_lp(c, A, b)
    loss = torch.sum((rs - target) ** 2)
    return loss

# Entrenamiento
def train_loop(dataloader, model, loss_fn, optimizer):
    model.train()
    for batch, (u_batch, target) in enumerate(dataloader):
        c, A, b = model(u_batch)

        # Calcular la pérdida
        loss = loss_fn(c[0], A[0], b[0], target)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 10 == 0:
            print(f"Loss: {loss.item():>7f}  [{batch * len(u_batch):>5d}/{len(dataloader.dataset):>5d}]")

def test_loop(dataloader, model, loss_fn):
    model.eval()
    test_loss = 0

    with torch.no_grad():
        for u_batch, target in dataloader:
            c, A, b = model(u_batch)
            test_loss += loss_fn(c[0], A[0], b[0], target).item()

    test_loss /= len(dataloader)
    print(f"Test Avg loss: {test_loss:>8f}\n")

# Configuración
learning_rate = 1e-3
epochs = 10
model = ParametricLPNet()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Entrenamiento y evaluación
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, my_loss, optimizer)
    # test_loop(train_dataloader, model, my_loss)

print("Done!")


Epoch 1
-------------------------------
Loss: 95.222305  [    0/   12]
Epoch 2
-------------------------------
Loss: 12.245973  [    0/   12]
Epoch 3
-------------------------------
Loss: 8.683558  [    0/   12]
Epoch 4
-------------------------------
Loss: 12.245973  [    0/   12]
Epoch 5
-------------------------------
Loss: 15.968550  [    0/   12]
Epoch 6
-------------------------------
Loss: 49.857544  [    0/   12]
Epoch 7
-------------------------------
Loss: 12.245973  [    0/   12]
Epoch 8
-------------------------------
Loss: 10.028285  [    0/   12]
Epoch 9
-------------------------------
Loss: 95.222305  [    0/   12]
Epoch 10
-------------------------------
Loss: 8.683558  [    0/   12]
Done!


In [61]:
for u in dataloader:
    print(u)

tensor([[3.],
        [1.]])
tensor([[2.],
        [4.]])
