# **Homework 2**

In [2]:
import gdown
import zipfile
import os

In [3]:
#Check if files exists in data folder
if os.path.exists('data/'):
    print('Files already downloaded.')

else:
    output_path = 'data.zip'
    file_id = '1KDN-rFCq9IDJ7_kNW5y5Co100KNpklz-'
    url = f'https://drive.google.com/uc?id={file_id}'
    # Download the zip file
    gdown.download(url, output_path, quiet=False)

    # Extract the contents of the zip file
    with zipfile.ZipFile(output_path, 'r') as zip_ref:
        zip_ref.extractall('data')

    # Remove the zip file
    os.remove(output_path)


Files already downloaded.


In [4]:
if(os.path.exists('test/') and os.path.exists('train')):
    print('Files already extracted')
else:
    print('Extracting the test.zip and train.zip files...')
    # Extract the test.zip file
    with zipfile.ZipFile('data/public/test.zip', 'r') as zip_ref:
        zip_ref.extractall()

    # Extract the train.zip file
    with zipfile.ZipFile('data/public/train.zip', 'r') as zip_ref:
        zip_ref.extractall()

    print('Done!')

Files already extracted


## First Approach

For the first approach, we will use a different architecture to train our model. We will use a custom convolutional neural network (CNN) architecture.

### Step 1: Data Loading and Preprocessing

Similar to the first approach, we will load and preprocess our dataset using the same transformations. We will also create data loaders for the training and validation sets.

### Step 2: Model Architecture

In this approach, we will define a custom CNN model. The model will consist of multiple convolutional layers followed by fully connected layers. We will use ReLU activation functions and dropout regularization to prevent overfitting.

### Step 3: Training Loop

We will train the model using a similar training loop as in the first approach. We will iterate over the training set, compute the loss, perform backpropagation, and update the model's weights.

### Step 4: Model Evaluation

After training, we will evaluate the model on the validation set. We will calculate the accuracy of the model by comparing the predicted labels with the ground truth labels.

### Step 5: Save the Model

Finally, we will save the trained model to a file for future use.

## Conclusion

In this second approach, we used a custom CNN architecture to train our model. This approach allows us to have more control over the model's architecture and potentially achieve better performance. However, it requires more manual design and experimentation compared to using a pre-trained model like ResNet18.

It is important to note that the choice of architecture depends on the specific problem and dataset. It is recommended to experiment with different architectures and hyperparameters to find the best model for your task.


In [8]:
from keras.preprocessing.image import ImageDataGenerator

trainingset = 'train/'
validationset = 'test/'

batch_size = 64
input_shape = ()
train_datagen = ImageDataGenerator(
    horizontal_flip=False,\
    vertical_flip=False,
    validation_split=0.2
    )


train_generator = train_datagen.flow_from_directory(
    directory=trainingset,
    target_size=(96, 96),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=True,
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    directory=validationset, # same directory as training data
    target_size=(96, 96),
    batch_size=batch_size,
    shuffle=False,
    class_mode='categorical',
    subset='validation') # set as validation data

num_samples = train_generator.n
num_classes = train_generator.num_classes
input_shape = train_generator.image_shape

classnames = [k for k,v in train_generator.class_indices.items()]
img_h=input_shape[0]
img_w=input_shape[1]
print("Image input %s" %str(input_shape))
print("Classes: %r" %classnames)
print('Loaded %d training samples from  %d classes.' %(num_samples,num_classes))
print('Loaded %d test samples from %d classes.' %(validation_generator.n,validation_generator.num_classes))


Found 5096 images belonging to 5 classes.
Found 548 images belonging to 5 classes.
Image input (96, 96, 3)
Classes: ['0', '1', '2', '3', '4']
Loaded 5096 training samples from  5 classes.
Loaded 548 test samples from 5 classes.


In [9]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten,\
                         Conv2D, MaxPooling2D
from keras.layers import BatchNormalization
from keras import regularizers
from keras import optimizers

def MyCNN(input_shape, num_classes):
    model = Sequential(name="MyOptimizedCNN")

    # C1 Convolutional Layer 
    model.add(Conv2D(filters=32, input_shape=input_shape, kernel_size=(5, 5)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    # C2 Convolutional Layer
    model.add(Conv2D(filters=64, kernel_size=(5, 5)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    # Pooling
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # C3 Convolutional Layer
    model.add(Conv2D(filters=128, kernel_size=(3, 3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    # Pooling
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Flatten
    model.add(Flatten())

    # D1 Dense Layer
    model.add(Dense(256, kernel_regularizer=regularizers.l2(0.01)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    # Dropout
    model.add(Dropout(0.5))

    # D2 Dense Layer
    model.add(Dense(128, kernel_regularizer=regularizers.l2(0.01)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    # Dropout
    model.add(Dropout(0.5))

    # Output Layer
    model.add(Dense(num_classes, activation='softmax'))

    # Compile
    optimizer = optimizers.RMSprop(learning_rate=0.001)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

# create the model
model = MyCNN(input_shape, num_classes)
model.summary()

Model: "MyOptimizedCNN"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 92, 92, 32)        2432      
                                                                 
 batch_normalization_5 (Bat  (None, 92, 92, 32)        128       
 chNormalization)                                                
                                                                 
 activation_5 (Activation)   (None, 92, 92, 32)        0         
                                                                 
 conv2d_4 (Conv2D)           (None, 88, 88, 64)        51264     
                                                                 
 batch_normalization_6 (Bat  (None, 88, 88, 64)        256       
 chNormalization)                                                
                                                                 
 activation_6 (Activation)   (None, 88, 88, 64)     

In [10]:
from keras import callbacks

# Define callbacks
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

# Assuming you have a `train_generator` and `validation_generator` defined

# Calculate steps per epoch and validation steps
steps_per_epoch = len(train_generator)
val_steps = len(validation_generator)

try:
    # Train the model with better training parameters
    history = model.fit(
        train_generator,
        epochs=50,
        steps_per_epoch=steps_per_epoch,
        validation_data=validation_generator,
        validation_steps=val_steps,
        callbacks=[early_stopping, reduce_lr]
    )

except KeyboardInterrupt:
    pass


Epoch 1/50
Epoch 2/50

In [7]:
val_steps=validation_generator.n//validation_generator.batch_size+1
loss, acc = model.evaluate_generator(validation_generator,steps=val_steps)
print('Test loss: %f' %loss)
print('Test accuracy: %f' %acc)

Epoch 1/50, Loss: 1.5625077855587006
Epoch 2/50, Loss: 1.4892176461219788
Epoch 3/50, Loss: 1.4617905294895173
Epoch 4/50, Loss: 1.4445521676540374
Epoch 5/50, Loss: 1.4318831050395966
Epoch 6/50, Loss: 1.4116279458999634
Epoch 7/50, Loss: 1.403861232995987
Epoch 8/50, Loss: 1.3920652484893798
Epoch 9/50, Loss: 1.3829091846942902
Epoch 10/50, Loss: 1.374531031847
Epoch 11/50, Loss: 1.3707058930397034
Epoch 12/50, Loss: 1.3652581417560576
Epoch 13/50, Loss: 1.3584611332416534
Epoch 14/50, Loss: 1.3579312932491303
Epoch 15/50, Loss: 1.3555889868736266
Epoch 16/50, Loss: 1.3486079335212708
Epoch 17/50, Loss: 1.3492146551609039
Epoch 18/50, Loss: 1.3507735419273377
Epoch 19/50, Loss: 1.348209935426712
Epoch 20/50, Loss: 1.3452800679206849
Epoch 21/50, Loss: 1.3465514647960664
Epoch 22/50, Loss: 1.3436218583583832
Epoch 23/50, Loss: 1.339033020734787
Epoch 24/50, Loss: 1.340093891620636
Epoch 25/50, Loss: 1.338769142627716
Epoch 26/50, Loss: 1.3347801744937897
Epoch 27/50, Loss: 1.335055662

In [9]:
import sklearn.metrics 
from sklearn.metrics import classification_report, confusion_matrix
 
preds = model.predict_generator(validation_generator,steps=val_steps)

Ypred = np.argmax(preds, axis=1)
Ytest = validation_generator.classes  # shuffle=False in test_generator

print(classification_report(Ytest, Ypred, labels=None, target_names=classnames, digits=3))

Validation Accuracy: 0.6867951982539106


In [None]:
import sklearn.metrics 
from sklearn.metrics import classification_report, confusion_matrix

preds = model.predict_generator(validation_generator,verbose=1,steps=val_steps)

Ypred = np.argmax(preds, axis=1)
Ytest = validation_generator.classes  # shuffle=False in test_generator

cm = confusion_matrix(Ytest, Ypred)

conf = [] # data structure for confusions: list of (i,j,cm[i][j])
for i in range(0,cm.shape[0]):
  for j in range(0,cm.shape[1]):
    if (i!=j and cm[i][j]>0):
      conf.append([i,j,cm[i][j]])

col=2
conf = np.array(conf)
conf = conf[np.argsort(-conf[:,col])]  # decreasing order by 3-rd column (i.e., cm[i][j])

print('%-16s     %-16s  \t%s \t%s ' %('True','Predicted','errors','err %'))
print('------------------------------------------------------------------')
for k in conf:
  print('%-16s ->  %-16s  \t%d \t%.2f %% ' %(classnames[k[0]],classnames[k[1]],k[2],k[2]*100.0/validation_generator.n))
  

In [None]:
import matplotlib.pyplot as plt

def plot_history(history,name):

    # summarize history for accuracy
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title(name + ' accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(name + ' loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
        
#name="SvevaNet"
#plot_history(history, name)
name="transferLearning"
plot_history(history_transfer, name)

In [13]:
import sys
import numpy as np

try:
    import gymnasium as gym
except ModuleNotFoundError:
    print('gymnasium module not found. Try to install with')
    print('pip install gymnasium[box2d]')
    sys.exit(1)


def play(env, model):

    seed = 2000
    obs, _ = env.reset(seed=seed)
    
    # drop initial frames
    action0 = 0
    for i in range(50):
        obs,_,_,_,_ = env.step(action0)
    
    done = False
    while not done:
        p = model(obs) # adapt to your model
        action = np.argmax(p)  # adapt to your model
        obs, _, terminated, truncated, _ = env.step(action)
        done = terminated or truncated




env_arguments = {
    'domain_randomize': False,
    'continuous': False,
    'render_mode': 'human'
}

env_name = 'CarRacing-v2'
env = gym.make(env_name, **env_arguments)

print("Environment:", env_name)
print("Action space:", env.action_space)
print("Observation space:", env.observation_space)

play(env, model)

Environment: CarRacing-v2
Action space: Discrete(5)
Observation space: Box(0, 255, (96, 96, 3), uint8)


TypeError: flatten() takes from 0 to 1 positional arguments but 2 were given

## Second Approach: Deep Reinforcement Learning (DRL)

In this approach, we will utilize Deep Reinforcement Learning (DRL) techniques to solve our problem. DRL combines the power of deep neural networks with reinforcement learning algorithms to learn optimal policies in complex environments.

### Step 1: Environment Setup

First, we need to define our environment. This includes selecting an appropriate gym environment or creating a custom environment that suits our problem. The environment should provide observations, actions, and rewards.

### Step 2: Agent Design

Next, we design our DRL agent. The agent consists of a deep neural network, often referred to as the Q-network, which takes observations as input and outputs action values for each possible action. We can use popular deep learning frameworks like PyTorch or TensorFlow to implement the Q-network.

### Step 3: Training Loop

The training loop involves the following steps:

1. Initialize the Q-network with random weights.
2. Observe the current state from the environment.
3. Select an action using an exploration-exploitation strategy, such as epsilon-greedy or softmax.
4. Execute the selected action in the environment and observe the next state and reward.
5. Update the Q-network using the observed state, action, next state, and reward.
6. Repeat steps 2-5 until convergence or a maximum number of iterations.

During training, we can use techniques like experience replay and target networks to stabilize and improve the learning process.

### Step 4: Evaluation

After training, we evaluate the performance of our agent by running it in the environment and measuring its performance metrics, such as average reward or success rate. This helps us assess the effectiveness of our DRL approach.

### Step 5: Fine-tuning and Optimization

Based on the evaluation results, we can fine-tune and optimize our DRL approach. This may involve adjusting hyperparameters, modifying the network architecture, or trying different exploration-exploitation strategies.

### Conclusion

Deep Reinforcement Learning (DRL) offers a powerful approach to solving complex problems by combining deep neural networks with reinforcement learning algorithms. By following the steps outlined above, we can develop and train a DRL agent to learn optimal policies in our environment. However, it is important to note that DRL can be computationally intensive and may require significant computational resources and time for training.

In [5]:
class DQN(nn.Module):
    def __init__(self, n_frames, n_actions, h_dimension):
        super(DQN, self).__init__()

        # CNN
        self.layers_cnn = nn.Sequential(
            nn.Conv2d(n_frames, 6, kernel_size=(7, 7), stride=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2)),
            nn.Conv2d(6, 12, kernel_size=(4, 4)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2)),
            nn.Flatten(),
            nn.Linear(432, h_dimension),
            nn.ReLU(),
            nn.Linear(h_dimension, n_actions)
        )

    def forward(self, x):
        o = self.layers_cnn(x)  # (BS, ACTIONS)
        return o

In [6]:
import random
from collections import deque
import numpy as np

class DQNAgent:
    def __init__(self,
                 action_space,
                 epsilon=1.0,
                 gamma=0.95,
                 epsilon_min=0.1,
                 epsilon_decay=0.9999,
                 lr=1e-3,
                 memory_len=5000,
                 frames=3,
                 hidden_dimension=None,
                 device=None):

        self.device = device
        self.epsilon = epsilon
        self.gamma = gamma
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.memory_len = memory_len
        self.lr = lr
        self.memory = deque(maxlen=self.memory_len)
        self.action_space = action_space

        self.target_model = DQN(frames, len(self.action_space), hidden_dimension).to(self.device)
        self.model =        DQN(frames, len(self.action_space), hidden_dimension).to(self.device)

        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)

    def update_target_model(self):
        self.target_model.load_state_dict(self.model.state_dict())

    def is_explore(self):
        flip = np.random.rand() <= self.epsilon
        return flip

    def act(self, state, is_only_random=False, is_only_exploit=False):
        if not is_only_exploit and self.is_explore() or is_only_random:
            action_index = np.random.randint(len(self.action_space))
            # print(action_index, self.ACTION_SPACE[action_index])
        else:
            q_values = self.target_model(state)[0]
            action_index = torch.argmax(q_values)
            # print("predicted action", action_index)
        return self.action_space[action_index]

    def memorize(self, state, action, reward, next_state, done):
        self.memory.append((state, self.action_space.index(action), reward, next_state, done))

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        train_state = []
        train_target = []

        for state, action_index, reward, next_state, done in minibatch:
            # state = torch.Tensor(state)
            target = self.model(state)[0]
            train_state.append(target)

            target_copy = target.detach().clone().to(self.device)
            if done:
                target_copy[action_index] = reward
            else:
                t = self.target_model(next_state)[0]
                target_copy[action_index] = reward + self.gamma * torch.max(t)
            train_target.append(target_copy)

        # Actual training
        criterion = nn.MSELoss()
        pred, tru = torch.stack(train_state), torch.stack(train_target)
        loss = criterion(pred, tru)

        # Optimize the model
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load_model(self, name):
        self.model = torch.load(name)
        self.target_model = torch.load(name)
        self.model.eval()

    def save_model(self, name):
        torch.save(self.target_model, name)

In [7]:
class Config:

    SEED = 1

    STARTING_EPISODE_TRAIN = 0
    ENDING_EPISODE_TRAIN = STARTING_EPISODE_TRAIN + 1000

    STARTING_EPISODE_TEST = ENDING_EPISODE_TRAIN + 1
    ENDING_EPISODE_TEST = STARTING_EPISODE_TEST + 100

    SKIP_FRAMES = 2
    TRAINING_BATCH_SIZE = 64
    UPDATE_TARGET_MODEL_FREQUENCY = 5
    N_FRAMES = 3
    HIDDEN_DIMENSION_FC = 150

    GAS_WEIGHT = 1.3

    ACTION_SPACE = [
        (-1, 1, 0.2), (0, 1, 0.2), (1, 1, 0.2),  # .  Action Space Structure
        (-1, 1, 0), (0, 1, 0), (1, 1, 0),        # (Steering Wheel, Gas, Break)
        (-1, 0, 0.2), (0, 0, 0.2), (1, 0, 0.2),  # .  -1~1     0~1        0~1
        (-1, 0, 0), (0, 0, 0), (1, 0, 0)
    ]


In [8]:

import os
from datetime import datetime
import cv2
import torch
import matplotlib.pyplot as plt
import random
import numpy as np

import json


def write_json_to_file(data, file_path):
    """
    Write JSON data to a file.

    Parameters:
    - data: A dictionary representing the JSON data.
    - file_path: The path where the JSON file will be written.
    """
    try:
        with open(file_path, 'w') as json_file:
            json.dump(data, json_file, indent=4)
        print(f"JSON data successfully written to {file_path}")
    except Exception as e:
        print(f"Error writing JSON data to {file_path}: {e}")


def read_json_from_file(file_path):
    """
    Read JSON data from a file.

    Parameters:
    - file_path: The path of the JSON file to be read.

    Returns:
    - A dictionary representing the JSON data.
    - If there is an error reading the file, returns None.
    """
    try:
        with open(file_path, 'r') as json_file:
            data = json.load(json_file)
        print(f"JSON data successfully read from {file_path}")
        return data
    except Exception as e:
        print(f"Error reading JSON data from {file_path}: {e}")
        return None


def make_all_paths(is_dynamic_root=True, dir_name="rl_class"):
    ROOT = "data"

    if is_dynamic_root:
        date_str = datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
        dir_name = "rl_class_{}".format(date_str)
    else:
        dir_name = dir_name

    path_root = ROOT + "/" + dir_name + "/"
    dirs = ["models", "plots", "videos"]
    for d in dirs:
        path = path_root + d
        if not os.path.exists(path):
            os.makedirs(path)
        print(">> Created dir", path)
    return path_root


def plot_state_car(data, title=None):
    assert len(data.shape) == 3, "Can only handle 3D mats."
    assert data.shape[0] < 10, "Too many states to plot. Adjust the plots position first."

    # Create a figure with three subplots
    fig, axs = plt.subplots(1, data.shape[0], figsize=(10, 4))

    # Plot each image using imshow()
    for i in range(data.shape[0]):
        axs[i].imshow(data[i], cmap='gray')  # You can adjust the colormap if needed
        axs[i].axis('off')                   # Turn off axis labels

    plt.title(title)
    plt.show()


def plot_frame_car(data, title=None):
    plt.imshow(data, cmap="gray")  # You can adjust the colormap if needed
    plt.axis('off')  # Turn off axis labels
    plt.title(title)
    plt.show()


def preprocess_frame_car(frame):
    def crop(frame):
        # Crop to 84x84
        return frame[:-12, 6:-6]

    def make_img_gray(frame):
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        return frame

    def normalize(frame):
        return frame / 255.0

    # frame = crop(frame)
    frame = make_img_gray(frame)
    frame = frame.astype(float)
    frame = normalize(frame)
    # frame = frame * 2 - 1   # maps [0,1] to [-1,1]
    return frame


def seed_everything(seed=42):
    # Set seed for Python random module
    random.seed(seed)

    # Set seed for NumPy
    np.random.seed(seed)

    # Set seed for PyTorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False  # disable if deterministic mode is desired


In [9]:

#import cv2   # open cv
import torch
from matplotlib import pyplot as plt
import gymnasium as gym
from collections import deque
import numpy as np
from gymnasium.wrappers import RecordVideo


def train_car_racing():
    seed_everything(seed=Config.SEED)
    PATH_ROOT = make_all_paths(is_dynamic_root=True)
    write_json_to_file(dict(Config.__dict__), file_path=PATH_ROOT + "config.json")

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('>> Using device:', device)

    agent = DQNAgent(frames=Config.N_FRAMES, action_space=Config.ACTION_SPACE, device=device,
                     hidden_dimension=Config.HIDDEN_DIMENSION_FC)

    # https://www.gymlibrary.dev/environments/box2d/car_racing/
    env = gym.make('CarRacing-v2', render_mode="rgb_array")  # , render_mode='human')
    env = RecordVideo(env, PATH_ROOT + 'videos', episode_trigger=lambda x: x % Config.UPDATE_TARGET_MODEL_FREQUENCY == 0)

    epi_total_rewards = []
    for e in range(Config.STARTING_EPISODE_TRAIN, Config.ENDING_EPISODE_TRAIN + 1):
        env.episode_id = e

        epi_total_reward = 0
        epi_negative_reward_counter = 0
        epi_time_frame_counter = 1
        epi_done = False

        init_state = env.reset(seed=e)[0]  # 96, 96, 3 pixels image RGB
        init_state = preprocess_frame_car(init_state)  # 96, 96 pixels image GRAY

        # (1) EVALUATE STATE: S
        state_queue = deque([init_state] * Config.N_FRAMES, maxlen=Config.N_FRAMES)
        # plot_state_car(np.array(state_queue))  # visualize S0

        while True:
            state_tensor = torch.Tensor(np.array(state_queue)).unsqueeze(0).to(device)
            action = agent.act(state_tensor)

            # (2) EXECUTE ACTION (for several steps)
            # (3) EVALUATE S' STATE, REWARD
            reward = 0
            for _ in range(Config.SKIP_FRAMES):
                # execute action
                next_state, r, epi_done, _, _ = env.step(action)
                # plot_frame_car(next_state)
                reward += r
                if epi_done:
                    break

            # (4) ADJUST REWARD
            # if getting negative reward 10 times after the tolerance steps, terminate this episode
            if epi_time_frame_counter > 100 and reward < 0:
                epi_negative_reward_counter += 1
            else:
                epi_negative_reward_counter = 0

            # extra bonus for the model if it uses full gas
            if action[1] == 1 and action[2] == 0:
                reward *= Config.GAS_WEIGHT

            epi_total_reward += reward

            # plot_state_car(np.array(state_queue), title="STATE 0")
            # process state S'
            next_state = preprocess_frame_car(next_state)
            next_state_queue = deque([frame for frame in state_queue], maxlen=Config.N_FRAMES)
            next_state_queue.append(next_state)
            # plot_state_car(np.array(next_state_queue), title="STATE 1")

            next_state_tensor = torch.Tensor(np.array(next_state_queue)).unsqueeze(0).to(device)

            # (5) STORE OBSERVATIONS
            # Memorizing saving state, action reward tuples
            agent.memorize(state_tensor, action, reward, next_state_tensor, epi_done)

            # S = S'
            state_queue = next_state_queue

            # early stop if the number of
            if epi_negative_reward_counter >= 25 or epi_total_reward < 0:
                break

            # (6) TRAIN ON BATCHES OF OBSERVATIONS
            # train the model with tuple, if there are enough tuples
            if len(agent.memory) > Config.TRAINING_BATCH_SIZE:
                agent.replay(Config.TRAINING_BATCH_SIZE)

            epi_time_frame_counter += 1
        epi_total_rewards += [epi_total_reward]

        # >>> ON EPISODE END
        # print stats
        stats_string = 'Episode: {}/{}, Scores(Time Frames): {}, Total Rewards: {:.2}, Epsilon: {:.2}'
        print(stats_string.format(
            e,
            Config.ENDING_EPISODE_TRAIN,
            epi_time_frame_counter,
            float(epi_total_reward),
            float(agent.epsilon))
        )

        if e % Config.UPDATE_TARGET_MODEL_FREQUENCY == 0:
            # plot rewards stats
            plt.plot(epi_total_rewards, label="cum rew", color="blue")
            plt.title("Rewards during episode episode")
            plt.savefig(PATH_ROOT + 'plots/reward_{}.pdf'.format(e))

            # save model frequently
            agent.save_model(PATH_ROOT + 'models/trial_{}.h5'.format(e))

            # swap model
            agent.update_target_model()
            write_json_to_file({"CUM_REW": epi_total_rewards}, PATH_ROOT + "/stats.json")

    env.close()


#train_car_racing()