In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Sampler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random


# from torchmetrics import Accuracy

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device", flush=True)

seed: int = 0
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

epochs = 1000
learning_rate = 1e-3
batch_size = 16

Using cpu device


In [2]:
# load the data in the output folder
# outdir = "/Users/ens/repos/marl/output/"
outdir = "output/"
data = np.load(outdir + "_trainingdata_groundmodel_exploit_True_numepi10000_K10_L10_M2_N10_T10.npy", allow_pickle=True).item()


In [3]:
data.keys()

dict_keys(['exploit_mode', 'episode_length', 'num_episodes', 'num_seeds', 'sys_parameters', 'times', 'states', 'actions'])

In [4]:
len(data["states"][0])

100000

In [5]:
states = data["states"][0]

In [6]:
states[0].shape

(10,)

In [7]:
actions = data["actions"][0]

In [8]:
actions[0].shape

(10,)

In [9]:
n_input = states[0].shape[0]
n_hidden = 256
n_out = actions[0].shape[0]

In [10]:
n_input, n_hidden, n_out

(10, 256, 10)

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim

class Net(nn.Module):
    def __init__(self, n_input, n_hidden, n_out):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(n_input, n_hidden)
        self.fc2 = nn.Linear(n_hidden, n_hidden)
        self.fc3 = nn.Linear(n_hidden, n_out)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

net = Net(n_input, n_hidden, n_out)
net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

In [12]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, states, actions):
        self.states = states
        self.actions = actions

    def __len__(self):
        return len(self.states)

    def __getitem__(self, idx):
        return self.states[idx], self.actions[idx]
    
dataset = CustomDataset(states, actions)

train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [14]:
logging_loss = [] 
for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.float().to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, loss: {running_loss/len(train_loader)}", flush=True)
    logging_loss.append(running_loss/len(train_loader))
torch.save(net.state_dict(), outdir + "groundmodel.pt")

Epoch 1, loss: 16.965213501281738
Epoch 2, loss: 16.960260276641847
Epoch 3, loss: 16.954902639923095
Epoch 4, loss: 16.949554048461913
Epoch 5, loss: 16.945407716827393
Epoch 6, loss: 16.941262779083253
Epoch 7, loss: 16.938545353851318
Epoch 8, loss: 16.934526713256837
Epoch 9, loss: 16.92930633804321
Epoch 10, loss: 16.928303722534178
Epoch 11, loss: 16.922564178009033
Epoch 12, loss: 16.920001546020508
Epoch 13, loss: 16.917760500183107
Epoch 14, loss: 16.914194700164796
Epoch 15, loss: 16.9124073197937
Epoch 16, loss: 16.908393014831542
Epoch 17, loss: 16.905182921600343
Epoch 18, loss: 16.903006777496337
Epoch 19, loss: 16.901918469543457
Epoch 20, loss: 16.89907569503784
Epoch 21, loss: 16.896977068786622
Epoch 22, loss: 16.894476890563965
Epoch 23, loss: 16.89324323196411
Epoch 24, loss: 16.89026269378662
Epoch 25, loss: 16.887078640136718
Epoch 26, loss: 16.886630764007567
Epoch 27, loss: 16.885871540374755
Epoch 28, loss: 16.882175700531008
Epoch 29, loss: 16.88114379043579
E

KeyboardInterrupt: 

In [None]:
# net = Net(n_input, n_hidden, n_out)
# net.load_state_dict(torch.load(outdir + "groundmodel.pt"))
# net.eval()

Evaluation on arbitrarily large dataset

Compute Irreducible loss by using groundmodel action probabilities on data (could also add this to data generation script)

In [None]:
outputs = net(inputs)
loss = criterion(outputs, labels)