In [1]:
import gym
import gym.spaces

DEFAULT_ENV_NAME = "PongNoFrameskip-v4" 
test_env = gym.make(DEFAULT_ENV_NAME)
print(test_env.action_space.n)

6


In [2]:
import warnings
warnings.filterwarnings('ignore')

Environment Stuff

In [3]:
# Taken from 
# https://github.com/PacktPublishing/Deep-Reinforcement-Learning-Hands-On/blob/master/Chapter06/lib/wrappers.py

import cv2
import numpy as np
import collections

class FireResetEnv(gym.Wrapper):
    def __init__(self, env=None):
        super(FireResetEnv, self).__init__(env)
        assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
        assert len(env.unwrapped.get_action_meanings()) >= 3

    def step(self, action):
        return self.env.step(action)

    def reset(self):
        self.env.reset()
        obs, _, done, _ = self.env.step(1)
        if done:
            self.env.reset()
        obs, _, done, _ = self.env.step(2)
        if done:
            self.env.reset()
        return obs

class MaxAndSkipEnv(gym.Wrapper):
    def __init__(self, env=None, skip=4):
        super(MaxAndSkipEnv, self).__init__(env)
        # most recent raw observations (for max pooling across time steps)
        self._obs_buffer = collections.deque(maxlen=2)
        self._skip = skip

    def step(self, action):
        total_reward = 0.0
        done = None
        for _ in range(self._skip):
            obs, reward, done, info = self.env.step(action)
            self._obs_buffer.append(obs)
            total_reward += reward
            if done:
                break
        max_frame = np.max(np.stack(self._obs_buffer), axis=0)
        return max_frame, total_reward, done, info

    def reset(self):
        self._obs_buffer.clear()
        obs = self.env.reset()
        self._obs_buffer.append(obs)
        return obs


class ProcessFrame84(gym.ObservationWrapper):
    def __init__(self, env=None):
        super(ProcessFrame84, self).__init__(env)
        self.observation_space = gym.spaces.Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)

    def observation(self, obs):
        return ProcessFrame84.process(obs)

    @staticmethod
    def process(frame):
        if frame.size == 210 * 160 * 3:
            img = np.reshape(frame, [210, 160, 3]).astype(np.float32)
        elif frame.size == 250 * 160 * 3:
            img = np.reshape(frame, [250, 160, 3]).astype(np.float32)
        else:
            assert False, "Unknown resolution."
        img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :, 2] * 0.114
        resized_screen = cv2.resize(img, (84, 110), interpolation=cv2.INTER_AREA)
        x_t = resized_screen[18:102, :]
        x_t = np.reshape(x_t, [84, 84, 1])
        return x_t.astype(np.uint8)


class BufferWrapper(gym.ObservationWrapper):
    def __init__(self, env, n_steps, dtype=np.float32):
        super(BufferWrapper, self).__init__(env)
        self.dtype = dtype
        old_space = env.observation_space
        self.observation_space = gym.spaces.Box(old_space.low.repeat(n_steps, axis=0),
                                                old_space.high.repeat(n_steps, axis=0), dtype=dtype)

    def reset(self):
        self.buffer = np.zeros_like(self.observation_space.low, dtype=self.dtype)
        return self.observation(self.env.reset())

    def observation(self, observation):
        self.buffer[:-1] = self.buffer[1:]
        self.buffer[-1] = observation
        return self.buffer


class ImageToPyTorch(gym.ObservationWrapper):
    def __init__(self, env):
        super(ImageToPyTorch, self).__init__(env)
        old_shape = self.observation_space.shape
        self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(old_shape[-1], 
                                old_shape[0], old_shape[1]), dtype=np.float32)

    def observation(self, observation):
        return np.moveaxis(observation, 2, 0)


class ScaledFloatFrame(gym.ObservationWrapper):
    def observation(self, obs):
        return np.array(obs).astype(np.float32) / 255.0

def make_env(env_name):
    env = gym.make(env_name)
    env = MaxAndSkipEnv(env)
    env = FireResetEnv(env)
    env = ProcessFrame84(env)
    env = ImageToPyTorch(env)
    env = BufferWrapper(env, 4)
    return ScaledFloatFrame(env)

In [4]:
env = make_env(DEFAULT_ENV_NAME)


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim
import argparse
import matplotlib
import torch.nn as nn
import matplotlib.pyplot as plt
import torchvision.transforms as transforms

from tqdm import tqdm
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.utils import save_image

import random
import numpy


matplotlib.style.use('ggplot')

In [2]:
features = 16
# define a simple linear VAE #until now normal VAE without Beta
class LinearVAE(nn.Module):
    def __init__(self):
        super(LinearVAE, self).__init__()
 
        # encoder 84*84 = 7’056
        self.enc0 = nn.Linear(in_features=84*84, out_features=1024)
        self.enc1 = nn.Linear(in_features=1024, out_features=512)
        self.enc2 = nn.Linear(in_features=512, out_features=features*2)
 
        # decoder 
        self.dec0 = nn.Linear(in_features=features, out_features=512)
        self.dec1 = nn.Linear(in_features=512, out_features=1024)
        self.dec2 = nn.Linear(in_features=1024, out_features=84*84)

    def reparameterize(self, mu, log_var):
        """
        :param mu: mean from the encoder's latent space
        :param log_var: log variance from the encoder's latent space
        """
        std = torch.exp(0.5*log_var) # standard deviation
        eps = torch.randn_like(std) # `randn_like` as we need the same size
        sample = mu + (eps * std) # sampling as if coming from the input space
        return sample
 
    def forward(self, x):
        # encoding
        x = F.relu(self.enc0(x))
        x = F.relu(self.enc1(x))

        x = self.enc2(x).view(-1, 2, features)

        # get `mu` and `log_var`
        mu = x[:, 0, :] # the first feature values as mean
        log_var = x[:, 1, :] # the other feature values as variance

        # get the latent vector through reparameterization
        z = self.reparameterize(mu, log_var)
 
        # decoding
        x = F.relu(self.dec0(z))
        x = F.relu(self.dec1(x))
        reconstruction = torch.sigmoid(self.dec2(x))
        return reconstruction, mu, log_var

Parameters for training

In [3]:
# leanring parameters
epochs = 10
train_games = 100
val_games = 20
batch_size = 64
beta = 5
lr = 0.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
import datetime
import os
x = datetime.datetime.now()
newpath = f"C:/Users/erics/Documents/Programme/Bachelorarbeit/beat_VAE_Pong_runs/runLinearBeta5-5-5-NOT0_1_Data_Lat{features}Beta_VAE_pong"
#newpath = f"C:/Users/erics/Documents/Programme/Bachelorarbeit/beat_VAE_Pong_runs/run1Beta{beta}Lat{latentDim}"
newpath = newpath + f"/outputBeta{x.day}-{x.month}"

if not os.path.exists(newpath):
    os.makedirs(newpath)
    
savingDir = newpath + "/epoch"

In [6]:
def final_loss(mse_loss, mu, logvar, beta, kl_wheight):
    """
    This function will add the reconstruction loss (MSELoss) and the (one could also take the mse loss instead of bce then we get a kind of PCA)
    KL-Divergence.
    KL-Divergence = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    :param bce_loss: recontruction loss
    :param mu: the mean from the latent vector
    :param logvar: log variance from the latent vector
    """
    MSE = mse_loss 
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return MSE + beta*kl_wheight*KLD

Load Data

In [8]:
train_data = numpy.load('train_data100kFEB23.npy')
val_data = numpy.load('val_data20kFEB23.npy')
#train_data = numpy.load('train_data100kMAR22.npy')
#val_data = numpy.load('val_data20kMAR22.npy')

Generate Data

In [None]:
train_data = []
#get the data randomly
total_games = train_games + val_games
for o in range(train_games):
    print("Training Data: In game "+ str(o) + " von Total" + str(total_games))
    state = env.reset()
    done = False

    while not done:
        action = random.choice([2,3])
        observation, reward, done, info = env.step(action)

        train_data.append(observation[3])

        
val_data = []
#get the data randomly
for i in range(val_games):
    print("Validation Data: In game "+ str(i) + " von Total" + str(total_games))

    state = env.reset()
    done = False

    while not done:
        action = random.choice([2,3])
        observation, reward, done, info = env.step(action)

        val_data.append(observation[3])

In [None]:
print(len(train_data))

In [None]:
import numpy


In [None]:
numpy.save('train_data100kFEB23',train_data,allow_pickle =False)
numpy.save('val_data20kFEB23',val_data,allow_pickle =False)

Model

In [9]:
# transforms why do i need a transform?
transform = transforms.Compose([
    transforms.ToTensor(),
])

In [10]:
train_loader = DataLoader(
    train_data,
    batch_size=batch_size,
    shuffle=True
)
val_loader = DataLoader(
    val_data,
    batch_size=batch_size,
    shuffle=False
)

In [11]:
model = LinearVAE().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.MSELoss(reduction='sum')
#criterion = torch.nn.MSELoss(reduction = 'sum')
print(model)

LinearVAE(
  (enc0): Linear(in_features=7056, out_features=1024, bias=True)
  (enc1): Linear(in_features=1024, out_features=512, bias=True)
  (enc2): Linear(in_features=512, out_features=32, bias=True)
  (dec0): Linear(in_features=16, out_features=512, bias=True)
  (dec1): Linear(in_features=512, out_features=1024, bias=True)
  (dec2): Linear(in_features=1024, out_features=7056, bias=True)
)


Training Loop (we train the autoencoder on one image in the buffer not on the total buffer. This could also be a nice feature)

In [12]:
def fit(model, dataloader):
    model.train()
    running_loss = 0.0
   # with torch.profiler.profile(schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=10),
   #                             on_trace_ready=torch.profiler.tensorboard_trace_handler('C:/Users/erics/Documents/Programme/Bachelorarbeit/Profiler/BVAE/Linear_MAR8/'),
   #                             record_shapes=True,
   #                             profile_memory=True,
   #                             with_stack=True) as prof: 
        
   #     prof.start()
    for i, data in tqdm(enumerate(dataloader), total=int(len(train_data)/dataloader.batch_size)):
        #data, _ = data
        data = data.to(device)
        data = data.view(data.size(0), -1)
        optimizer.zero_grad()
        reconstruction, mu, logvar = model(data)
        mse_loss = criterion(reconstruction, data)
        loss = final_loss(mse_loss, mu, logvar, beta, kl_wheight = dataloader.batch_size/len(train_data))
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
     #       prof.step()

     #   prof.stop()

    train_loss = running_loss/len(dataloader.dataset)
    return train_loss

In [15]:
def validate(model, dataloader):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)):
            #data, _ = data
            data = data.to(device)
            data = data.view(data.size(0), -1)
            reconstruction, mu, logvar = model(data)
            mse_loss = criterion(reconstruction, data)
            loss = final_loss(mse_loss, mu, logvar, beta, kl_wheight = dataloader.batch_size/len(val_data))
            running_loss += loss.item()
        
            # save the last batch input and output of every epoch
            if i == int(len(val_data)/dataloader.batch_size) - 1:
                num_rows = 8
                both = torch.cat((data.view(batch_size, 1, 84, 84)[:8], 
                                  reconstruction.view(batch_size, 1, 84, 84)[:8]))
                save_image(both.cpu(), savingDir + f"{epoch}.png", nrow=num_rows)
    val_loss = running_loss/len(dataloader.dataset)
    return val_loss

In [16]:
train_loss = []
val_loss = []
for epoch in range(epochs):
    print(f"Epoch {epoch+1} of {epochs}")
    train_epoch_loss = fit(model, train_loader)
    val_epoch_loss = validate(model, val_loader)
    train_loss.append(train_epoch_loss)
    val_loss.append(val_epoch_loss)
    print(f"Train Loss: {train_epoch_loss:.4f}")
    print(f"Val Loss: {val_epoch_loss:.4f}")

Epoch 1 of 10


1592it [01:05, 24.23it/s]                                                                                              
328it [00:04, 81.99it/s]                                                                                               


Train Loss: 1.3441
Val Loss: 1.6240
Epoch 2 of 10


1592it [01:22, 19.34it/s]                                                                                              
328it [00:03, 98.17it/s]                                                                                               


Train Loss: 1.3061
Val Loss: 1.5448
Epoch 3 of 10


1592it [01:17, 20.42it/s]                                                                                              
328it [00:03, 100.06it/s]                                                                                              


Train Loss: 1.2505
Val Loss: 1.5488
Epoch 4 of 10


1592it [01:17, 20.44it/s]                                                                                              
328it [00:03, 98.16it/s]                                                                                               


Train Loss: 1.2043
Val Loss: 1.4466
Epoch 5 of 10


1592it [01:25, 18.66it/s]                                                                                              
328it [00:03, 89.82it/s]                                                                                               


Train Loss: 1.1458
Val Loss: 1.4087
Epoch 6 of 10


1592it [01:17, 20.57it/s]                                                                                              
328it [00:03, 98.53it/s]                                                                                               


Train Loss: 1.0944
Val Loss: 1.3440
Epoch 7 of 10


1592it [01:29, 17.74it/s]                                                                                              
328it [00:03, 99.80it/s]                                                                                               


Train Loss: 1.0619
Val Loss: 1.3226
Epoch 8 of 10


1592it [01:16, 20.69it/s]                                                                                              
328it [00:03, 86.95it/s]                                                                                               


Train Loss: 0.9727
Val Loss: 1.2226
Epoch 9 of 10


1592it [01:16, 20.77it/s]                                                                                              
328it [00:03, 101.71it/s]                                                                                              


Train Loss: 0.8619
Val Loss: 1.1706
Epoch 10 of 10


1592it [01:16, 20.84it/s]                                                                                              
328it [00:03, 100.91it/s]                                                                                              

Train Loss: 0.7946
Val Loss: 1.1089





In [28]:
torch.save(model.state_dict(), 'C:/Users/erics/Documents/Programme/Bachelorarbeit/models/BVAE_Pong/B=5-5-5_0_1_DataBetadecreaseAPR12')

ressourcenauslastung GPU: Copy ~22%, vram 100%, 3D 0% CPU ~25%