In [1]:
%load_ext autoreload
%autoreload 2
%pdb

Automatic pdb calling has been turned ON


In [None]:
from dss.env import FashionMNISTEnv 
import matplotlib.pyplot as plt
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from torch.utils.data import Dataset

In [None]:
# Create the RL environment
env = DummyVecEnv([lambda: FashionMNISTEnv()])

# Train the policy using PPO
agent = PPO("MlpPolicy", env, verbose=1)
agent.learn(total_timesteps=int(1e5))

Using cuda device
-----------------------------
| time/              |      |
|    fps             | 515  |
|    iterations      | 1    |
|    time_elapsed    | 3    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 437          |
|    iterations           | 2            |
|    time_elapsed         | 9            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.009111204  |
|    clip_fraction        | 0.0635       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.686       |
|    explained_variance   | -0.005438447 |
|    learning_rate        | 0.0003       |
|    loss                 | 0.407        |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.00858     |
|    value_loss           | 5.08         |
-----------------------------------------

<stable_baselines3.ppo.ppo.PPO at 0x7f2d23fd7160>

# Eval

In [22]:
# Create the RL environment
env = DummyVecEnv([lambda: FashionMNISTEnv()])
X = []
y = []
obs = env.reset()
done = False
actions = []
while not done:
    action, _states = agent.predict(obs, deterministic=True)
    action = [1 if a == 0 else 0 for a in action]
    obs, reward, done, info = env.step(action)
    if len(env.envs[0].samples) > 0:
       X = env.envs[0].samples
       y = env.envs[0].labels

In [23]:
import torch
from torch import nn, optim
from torchvision import datasets, transforms
import numpy as np

class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(28 * 28, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        return self.fc(x)

class SimpleDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx].squeeze(0), self.y[idx].squeeze(0)

def test_model(model):
    model.eval()
    test_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(root='./data', train=False, transform=transforms.ToTensor()),
        batch_size=1000, shuffle=False
    )

    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data.to('cuda'))
            pred = output.argmax(dim=1, keepdim=True).to('cpu')
            correct += pred.eq(target.view_as(pred)).sum().item()

    print(f'Test Accuracy: {correct / len(test_loader.dataset) * 100:.2f}%')
    return correct / len(test_loader.dataset)

eval_runs = 10
accs = []
for i in range(eval_runs):
    train_dataset = SimpleDataset(X, y)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

    # Initialize model, optimizer, and environment
    model = SimpleNet()
    model.to('cuda')
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    count = 0
    for epoch in range(5):
        for data in train_loader:
            image, label = data
            output = model(image.to('cuda'))
            loss = nn.CrossEntropyLoss()(output, label.to('cuda'))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            count += 1

    acc = test_model(model)
    accs.append(acc)

np.mean(accs), np.std(accs)

Test Accuracy: 83.33%
Test Accuracy: 81.21%
Test Accuracy: 78.62%
Test Accuracy: 82.81%
Test Accuracy: 82.00%
Test Accuracy: 81.68%
Test Accuracy: 81.25%
Test Accuracy: 82.69%
Test Accuracy: 83.40%
Test Accuracy: 80.91%


(np.float64(0.8179000000000001), np.float64(0.013535730493770909))

In [16]:
eval_runs = 10
accs = []
for i in range(eval_runs):
    train_dataset = datasets.FashionMNIST(root='./data', train=True, transform=transforms.ToTensor())
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

    # Initialize model, optimizer, and environment
    model = SimpleNet()
    model.to('cuda')
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    count = 0
    for epoch in range(5):
        for data in train_loader:
            image, label = data
            output = model(image.to('cuda'))
            loss = nn.CrossEntropyLoss()(output, label.to('cuda'))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            count += 1
            if count >= int(.2 * len(train_loader)):
                break

    acc = test_model(model)
    accs.append(acc)
np.mean(accs), np.std(accs)

Test Accuracy: 78.06%
Test Accuracy: 81.01%
Test Accuracy: 75.60%
Test Accuracy: 78.82%
Test Accuracy: 76.78%
Test Accuracy: 80.73%
Test Accuracy: 80.28%
Test Accuracy: 77.43%
Test Accuracy: 79.29%
Test Accuracy: 79.36%


(np.float64(0.78736), np.float64(0.01675035522011399))