In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

In [3]:
# Set random seed for reproducibility
np.random.seed(42)

# Number of prizes
n = 5

# Create a random d vector for n prizes (upper bounds on collection)
# For example, each d_i is drawn uniformly from 1 to 5
d = np.random.randint(1, 6, size=n)

# Define the reward function
def reward_function(x, d):
    # x: array of collected amounts (shape: (n,))
    # d: array of upper bounds (shape: (n,))
    return np.sum(np.minimum(x, d))

def modular_reward_function(x, d):
    return np.sum(x)

# Function to generate a dataset
def generate_dataset(num_samples, d, extra=3, modular=False):
    # For each prize i, sample x_i uniformly from 0 to d[i] + extra
    X = np.zeros((num_samples, len(d)), dtype=int)
    y = np.zeros(num_samples, dtype=int)
    
    for i in range(num_samples):
        sample = [np.random.randint(0, d_i + extra + 1) for d_i in d]
        X[i, :] = sample
        if modular:
            y[i] = modular_reward_function(sample, d)
        else:
            y[i] = reward_function(sample, d)

    return X, y

# Generate training and testing sets
num_train = 100
num_test = 30

X_train, y_train = generate_dataset(num_train, d, modular=True)
X_test, y_test = generate_dataset(num_test, d, modular=True)

print("\nFirst 5 training samples (X_train and corresponding rewards):")
for i in range(5):
    print("X_train[{}] = {}, reward = {}".format(i, X_train[i], y_train[i]))

print("\nFirst 5 testing samples (X_test and corresponding rewards):")
for i in range(5):
    print("X_test[{}] = {}, reward = {}".format(i, X_test[i], y_test[i]))



First 5 training samples (X_train and corresponding rewards):
X_train[0] = [6 2 6 7 4], reward = 25
X_train[1] = [3 7 2 5 4], reward = 21
X_train[2] = [1 7 3 5 1], reward = 17
X_train[3] = [7 4 0 5 8], reward = 24
X_train[4] = [0 2 3 6 3], reward = 14

First 5 testing samples (X_test and corresponding rewards):
X_test[0] = [4 8 3 4 8], reward = 27
X_test[1] = [7 2 0 2 3], reward = 14
X_test[2] = [1 0 6 7 6], reward = 20
X_test[3] = [5 4 3 0 6], reward = 18
X_test[4] = [4 6 0 2 8], reward = 20


In [4]:
from dqn import MonotoneSubmodularNet
# Hyperparameters
learning_rate = 1e-2
num_epochs = 100
batch_size = 1

model = MonotoneSubmodularNet([1, 10, 1], 0.5, 1, 1)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

for batch_X, batch_y in train_loader:
    outputs = model(batch_X)
    print(batch_X.shape)
    print("x")
    print(batch_X)
    print("outputs")
    print(outputs)

    # print(outputs)
    # print("true")
    # print(batch_y)

torch.Size([1, 5])
x
tensor([[3., 7., 2., 5., 4.]])
outputs
tensor([[0.]], grad_fn=<SumBackward1>)
torch.Size([1, 5])
x
tensor([[0., 3., 0., 0., 3.]])
outputs
tensor([[0.]], grad_fn=<SumBackward1>)
torch.Size([1, 5])
x
tensor([[3., 0., 1., 0., 4.]])
outputs
tensor([[0.]], grad_fn=<SumBackward1>)
torch.Size([1, 5])
x
tensor([[0., 7., 3., 1., 0.]])
outputs
tensor([[0.]], grad_fn=<SumBackward1>)
torch.Size([1, 5])
x
tensor([[7., 4., 0., 5., 8.]])
outputs
tensor([[0.]], grad_fn=<SumBackward1>)
torch.Size([1, 5])
x
tensor([[2., 8., 0., 1., 6.]])
outputs
tensor([[0.]], grad_fn=<SumBackward1>)
torch.Size([1, 5])
x
tensor([[1., 3., 5., 7., 6.]])
outputs
tensor([[0.]], grad_fn=<SumBackward1>)
torch.Size([1, 5])
x
tensor([[7., 8., 5., 4., 0.]])
outputs
tensor([[0.]], grad_fn=<SumBackward1>)
torch.Size([1, 5])
x
tensor([[6., 5., 5., 5., 5.]])
outputs
tensor([[0.]], grad_fn=<SumBackward1>)
torch.Size([1, 5])
x
tensor([[2., 4., 0., 7., 4.]])
outputs
tensor([[0.]], grad_fn=<SumBackward1>)
torch.Size

In [7]:
writer = SummaryWriter(log_dir='runs/submodular_net')

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        # if epoch == 0:
        #     for name, param in model.named_parameters():
        #         if param.requires_grad and param.grad is not None and param.grad.sum() > 0:
        #             print(f"Parameter: {name}, Gradient: {param.grad}")
        #             print(f"Parameter val: {param}")

        running_loss += loss.item() * batch_X.size(0)
        model.clamp_weights()
    
    epoch_loss = running_loss / len(train_dataset)

    for name, param in model.named_parameters():
        if 'weight' in name:
            writer.add_histogram(f'weights_{name}', param, epoch)
        if param.grad is not None:
            writer.add_histogram(f'gradients_{name}', param.grad, epoch)
    
    # Evaluate on test set
    model.eval()
    test_loss = 0.0
    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            test_loss += loss.item() * batch_X.size(0)
    test_loss = test_loss / len(test_dataset)
    
    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {epoch_loss:.4f}, Test Loss: {test_loss:.4f}")

Epoch 1/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 2/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 3/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 4/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 5/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 6/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 7/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 8/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 9/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 10/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 11/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 12/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 13/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 14/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 15/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 16/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 17/100 - Train Loss: 358.4200, Test Loss: 409.9000
Epoch 18/100 - Train Loss: 358.4200, Tes

In [None]:
for name, param in model.named_parameters():
    print(name)

In [None]:
with torch.no_grad():
    for batch_X, batch_y in test_loader:
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        print(batch_X)
        print(outputs, batch_y)
        test_loss += loss.item() * batch_X.size(0)

In [None]:
example_input = torch.tensor([[100, 100, 0, 0, 0]], dtype=torch.float32)
model(example_input)
print(model.m[0].weight.data)

In [None]:
from dqn import IncreasingConcaveNet
import matplotlib.pyplot as plt

writer = SummaryWriter(log_dir='runs/increasing_concave_net')

# Define a known increasing concave function
def true_function(x):
    return torch.log(x)  # slight shift to avoid sqrt(0)

# Generate training data
x_train = torch.linspace(0.1, 10, 100).unsqueeze(1)  # shape (100, 1)
y_train = true_function(x_train)

# Instantiate the model
model = IncreasingConcaveNet([1, 32, 32, 32, 1], 1.0)
model.clamp_weights()  # clamp initial weights

# Optimizer and loss
optimizer = optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.MSELoss()

# Training loop
for epoch in range(1000):
    optimizer.zero_grad()
    y_pred = model(x_train)
    loss = loss_fn(y_pred, y_train)
    loss.backward()
    optimizer.step()
    model.clamp_weights()

    for name, param in model.named_parameters():
        if 'weight' in name:
            writer.add_histogram(f'{name}_weights', param, epoch)
        if param.grad is not None:
            writer.add_histogram(f'gradients_{name}', param.grad, epoch)
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.6f}")
    

# Plot the result
with torch.no_grad():
    y_pred = model(x_train)
    plt.plot(x_train.numpy(), y_train.numpy(), label='True function')
    plt.plot(x_train.numpy(), y_pred.numpy(), label='Model output')
    plt.legend()
    plt.title("Fitting an Increasing Concave Function")
    plt.xlabel("x")
    plt.ylabel("f(x)")
    plt.show()

In [None]:
for name, param in model.named_parameters():
    print(name)

In [None]:
from dqn import IncreasingConcaveNet
num_epochs = 50
learning_rate = 1e-2
model = IncreasingConcaveNet([5, 32, 32, 32, 1], 1.0)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * batch_X.size(0)
        # model.clamp_weights()
    
    epoch_loss = running_loss / len(train_dataset)

    # for name, param in model.named_parameters():
    #     if param.grad is not None:
    #         print(param.grad)

    # Evaluate on test set
    model.eval()
    test_loss = 0.0
    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            test_loss += loss.item() * batch_X.size(0)
    test_loss = test_loss / len(test_dataset)
    
    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {epoch_loss:.4f}, Test Loss: {test_loss:.4f}")

In [None]:
with torch.no_grad():
    for batch_X, batch_y in test_loader:
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        print(batch_X)
        print(outputs, batch_y)
        test_loss += loss.item() * batch_X.size(0)

In [12]:
def generate_dataset(num_samples, d, extra=3, modular=False):
    # For each prize i, sample x_i uniformly from 0 to d[i] + extra
    X = np.zeros((num_samples, len(d)), dtype=int)
    y = np.zeros(num_samples, dtype=int)
    
    for i in range(num_samples):
        sample = [np.random.randint(0, d_i + extra + 1) for d_i in d]
        X[i, :] = sample
        if modular:
            y[i] = modular_reward_function(sample, d)
        else:
            y[i] = reward_function(sample, d)

    return X, y

# Define the reward function
def reward_function(x, d):
    # x: array of collected amounts (shape: (n,))
    # d: array of upper bounds (shape: (n,))
    # print(x)    
    # print(d)
    # print(np.minimum(x, d))
    return np.sum(np.minimum(x, d))

def modular_reward_function(x, d):
    return np.sum(x)

d = np.random.randint(1, 6, size=3)
X, y = generate_dataset(10, d, modular=False)
print(d)
print(X)
print(y)

[2 4 5]
[[3 3 6]
 [2 2 5]
 [3 5 3]
 [3 2 1]
 [5 5 0]
 [3 5 7]
 [5 2 2]
 [5 2 6]
 [5 6 7]
 [1 2 4]]
[10  9  9  5  6 11  6  9 11  7]
