In [2]:
# Third-party packages and modules:
from collections import deque
from datetime import datetime
import numpy as np
import gym, os, json
# My packages and modules:
import utils
from PIL import Image
import copy
import gzip
import pickle
import os
from datetime import datetime
import my_neural_network as mnn
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()


def read_data():
    """Reads the states and actions recorded by drive_manually.py"""
    print("Reading data")
    with gzip.open('./data/data_02new.pkl.gzip','rb') as f:
    #with gzip.open('./data/data_new.pkl.gzip','rb') as f:
        data = pickle.load(f)
    print("data extracted")
    X = utils.vstack(data["state"])
    print("X ready")
    y = utils.vstack(data["action"])
    print("y ready")
    return X, y

def preprocess_data(X, y, hist_len, shuffle):
    """ Preprocess states and actions from expert dataset before feeding them to the agent """
    print('Preprocessing states. Shape:', X.shape)
    utils.check_invalid_actions(y)
    y_pp = utils.transl_action_env2agent(y)
    X_pp = utils.preprocess_state(X)
    X_pp, y_pp = utils.stack_history(X_pp, y_pp, hist_len, shuffle=shuffle)
    return X_pp, y_pp

def split_data(X, y, frac = 0.1):
    """ Splits data into training and validation set """
    split = int((1-frac) * len(y))
    X_train, y_train = X[:split], y[:split]
    X_valid, y_valid = X[split:], y[split:]
    return X_train, y_train, X_valid, y_valid

def plot_states(x_pp, X_tr=None, n=3):
    """ Plot some random states before and after preprocessing """
    pick = np.random.randint(0, len(x_pp), n)
    fig, axes = plt.subplots(n, 2, sharex=True, sharey=True, figsize=(20,20))
    for i, p in enumerate(pick):
        if X_tr is not None:
            axes[i,0].imshow(X_tr[p]/255)
        axes[i,1].imshow(np.squeeze(x_pp[p]), cmap='gray')
    fig.tight_layout()
    plt.show()

def plot_action_histogram(actions, title):
    """ Plot the histogram of actions from the expert dataset """
    acts_id = utils.unhot(actions)
    fig, ax = plt.subplots()
    bins = np.arange(-.5, utils.n_actions + .5)
    ax.hist(acts_id, range=(0,6), bins=bins, rwidth=.9)
    ax.set(title=title, xlim=(-.5, utils.n_actions -.5))
    plt.show()

class Agent:
    # Constructor is "overloaded" by the functions bellow.
    def __init__(self, model):
        # The neural network:
        self.model = model
        # Just a constant:
        self.accelerate = np.array([0.0, 1.0, 0.0], dtype=np.float32)


    @classmethod  # Constructor for a brand new model
    def from_scratch(cls, n_channels):
        layers = [
            mnn.layers.Input(input_shape=[96, 96, n_channels]), 
            mnn.layers.Conv2d(filters=16, kernel_size=5, stride=4), 
            mnn.layers.ReLU(), 
            mnn.layers.Dropout(drop_probability=0.5),
            mnn.layers.Conv2d(filters=32, kernel_size=3, stride=2
                             ), 
            mnn.layers.ReLU(), 
            mnn.layers.Dropout(drop_probability=0.5),
            mnn.layers.Flatten(), 
            mnn.layers.Linear(n_units=128), 
            mnn.layers.Linear(n_units=utils.n_actions), 
        ]
        model = mnn.models.Classifier_From_Layers(layers)
        return Agent(model)
    
    @classmethod  # Constructor to load a model from a file
    def from_file(cls, file_name):
        model = mnn.models.Classifier_From_File('saved_models/')
        return Agent(model)

    def train(self, X_train, y_train, X_valid, y_valid, n_batches, batch_size, lr, display_step):
        print("Training model")
        self.model.train(X_train, y_train, X_valid, y_valid, n_batches, batch_size, lr, display_step)

    def begin_new_episode(self, state0):
        # A history of the last n agent's actions
        self.action_history = deque(maxlen=100)
        # Buffer for actions that may eventually overwrite the model
        self.overwrite_actions = []
        # Keep track of how many state transitions were made
        self.action_counter = 0
        # This data structure (kind of a deque) will always store the
        # last 'history_lenght' states and will be fed to the model:
        self.state_hist = np.empty((1, state0.shape[0], state0.shape[1], utils.history_length))
        for _ in range(utils.history_length):
            self.__push_state(state0)

    def __push_state(self, state):
        # Push the current state to the history. 
        # Oldest state in history is discarded.
        sg = state.astype(np.float32)
        sg = np.expand_dims(sg, 0)
        sg = utils.preprocess_state(sg)
        self.state_hist[0,:,:,1:] = self.state_hist[0,:,:,:-1]
        self.state_hist[0,:,:,0] = sg[0]

    def get_action(self, env_state):
        # Add the current state to the state history:
        self.__push_state(env_state)

        # First actions will always be to accelerate:
        if self.action_counter < utils.dead_start:
            self.action_history.append(self.accelerate)
            self.action_counter += 1
            return self.accelerate

        # If the car is stuck for too long, the neural network is overwritten:
        if len(self.overwrite_actions) > 0:
            #print('Neural network overwritten')
            action = self.overwrite_actions.pop()
            self.action_history.append(action)
            return action

        # Check if the car is frozen:
        if self.check_freeze():
            print('Freeze detected. Overwritting...')

        # Uses the NN to choose the next action:
        agent_action = self.model.predict(self.state_hist)
        agent_action = utils.transl_action_agent2env(agent_action)
        self.action_history.append(agent_action)
        return agent_action
    
    def check_freeze(self):
        # If all the last actions are all the same and they 
        # are not accelerate, then the car is stuck somewhere.
        fa = self.action_history[0]
        for a in self.action_history:
            if not np.all(a==fa):
                return False
            if np.all(a == self.accelerate):
                return False
        
        # If the code reaches this point, the car is stuck
        fa[2] = 0.0  # release break
        overwrite_cycles = 2
        one_cicle = 10 * [fa] + 10 * [self.accelerate]
        self.overwrite_actions = overwrite_cycles * one_cicle
        return True

    def save(self, file_name):
        # Save model to a file
        self.model.save(file_name, close_session=True)
    
def save_performance_results(episode_rewards, directory):
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()
    
    time_stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    fname = f"{directory}results_bc_agent-{time_stamp}.json"
    with open(fname, "w") as fh:
        json.dump(results, fh)


2023-02-08 22:44:47.324399: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


Instructions for updating:
non-resource variables are not supported in the long term


In [3]:
def store_data(data, datasets_dir="./data"):
    # load existing data
    data_file = os.path.join(datasets_dir, 'data_02.pkl.gzip')
    if os.path.exists(data_file):
        with gzip.open(data_file,'rb') as f:
            old_data = pickle.load(f)
        # append new data
        old_data["state"].extend(data["state"])
        old_data["action"].extend(data["action"])
        data = old_data
    else:
        if not os.path.exists(datasets_dir):
            os.mkdir(datasets_dir)
    
    # save combined data
    with gzip.open(data_file,'wb') as f:
        pickle.dump(data, f)



In [4]:
def measure_grass_percentage_in_region(image, region, threshold):
    # Unpack the region coordinates
    x1, y1, x2, y2 = region
    
    # Select the region from the image
    selected_region = image[y1:y2, x1:x2]
    
    # Define the grass colors to look for
    colors = [(102., 229., 102.), (102., 204., 102.)]
    # Keep track of the number of selected pixels
    selected_pixels = 0
    
    # Loop over the grass colors
    for color in colors:
        # Create a boolean mask for the current color
        color_mask = np.all(selected_region == color, axis=-1)
        
        # Add the number of pixels of the current color to the total
        selected_pixels += np.sum(color_mask)
    
    # Calculate the color percentage
    color_percentage = selected_pixels / np.prod(selected_region.shape[:2])
    #print(color_percentage)
    # Check if the color percentage exceeds the threshold
    if color_percentage >= threshold:
        print("Warning: color percentage exceeds threshold")
        
    # Draw a white rectangle around the selected region
    image[y1:y2, x1, :] = [255, 255, 255]
    image[y1:y2, x2, :] = [255, 255, 255]
    image[y1, x1:x2, :] = [255, 255, 255]
    image[y2, x1:x2, :] = [255, 255, 255]
    
    # Return the modified image
    return image



In [None]:
    ###################TEST AND DATA COLLECTION UNIT#################
    
    n_test_episodes = 1
    max_timesteps=1000
    #region = (20,50,76,81)
    region = (30,50,66,81)
    
    # Initialize environment and agent:
    #agent = Agent.from_file('saved_models/')
    agent = Agent.from_file('sample/')
    
    good_samples = {
        "state": [],
        "next_state": [],
        "reward": [],
        "action": [],
        "terminal" : [],
    }
    episode_samples = copy.deepcopy(good_samples)
    
    env = gym.make('CarRacing-v0').unwrapped
    env.reset()
    
    episode_rewards = []#To record the average reward
    good_expert=0
    
    # Episode loop
    for i in range(n_test_episodes):
        good_samples["state"] = []
        good_samples["action"] = []
        good_samples["next_state"] = []
        good_samples["reward"] = []
        good_samples["terminal"] = []
        episode_samples["state"] = []
        episode_samples["action"] = []
        episode_samples["next_state"] = []
        episode_samples["reward"] = []
        episode_samples["terminal"] = []
        episode_reward = 0
        
        state = env.reset()
        agent.begin_new_episode(state0=state)
        
        # State loop
        for _ in range(max_timesteps):
            # Request action from agent:
            agent_action = agent.get_action(state)
            # Given this action, get the next environment state and reward:
            next_state, r, done, info = env.step(agent_action)
            episode_reward += r

            episode_samples["state"].append(state)            # state has shape (96, 96, 3)
            episode_samples["action"].append(np.array(agent_action))    # action has shape (1, 3)
            episode_samples["next_state"].append(next_state)
            episode_samples["reward"].append(r)
            episode_samples["terminal"].append(done)
            
            image= measure_grass_percentage_in_region(state, region, threshold=0.95)            
            
            state = next_state 
            env.render()
                  
            if done: 
                break
        if (i+1) % 10 ==0:
            print(f'Episode {i+1}')
        episode_rewards.append(episode_reward)
        good_samples["state"].append(episode_samples["state"])
        good_samples["action"].append(episode_samples["action"])
        good_samples["next_state"].append(episode_samples["next_state"])
        good_samples["reward"].append(episode_samples["reward"])
        good_samples["terminal"].append(episode_samples["terminal"])
        
        if episode_reward > 1500:
            good_expert+=1
            print('GOOD EXPERT... saving data***************************************') 
            store_data(good_samples, "./data")

    env.close()

    # save reward statistics in a .json file
    #save_performance_results(episode_rewards, 'performance_results/')
    pil_image = Image.fromarray(image)
    pil_image.show()
    print('... finished')
    average = sum(episode_rewards) / len(episode_rewards)
    print(f"-------------AVERAGE REWARD: {average}")
    print(f"-------------No.GOOD EXPERT: {good_expert} out of {n_test_episodes} experts")    
    X, y = read_data()
    print(X.shape)
    print(y.shape)

    

2023-02-08 22:44:49.964304: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2023-02-08 22:44:49.994327: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-08 22:44:49.994722: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 3080 computeCapability: 8.6
coreClock: 1.71GHz coreCount: 68 deviceMemorySize: 9.75GiB deviceMemoryBandwidth: 707.88GiB/s
2023-02-08 22:44:49.994813: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
2023-02-08 22:44:50.035879: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11
2023-02-08 22:44:50.036064: I tensorflow/stream_executor/plat

Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
INFO:tensorflow:Restoring parameters from saved_models/variables/variables


2023-02-08 22:44:51.292353: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1258] Device interconnect StreamExecutor with strength 1 edge matrix:
2023-02-08 22:44:51.292387: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1264]      0 
2023-02-08 22:44:51.292397: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1277] 0:   N 
2023-02-08 22:44:51.292611: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-08 22:44:51.292845: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-08 22:44:51.293047: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-

Track generation: 1059..1327 -> 268-tiles track
0.0
Track generation: 1104..1384 -> 280-tiles track
0.0
2.1912044564876854
2.1912044564876854
3.3981070073163964
3.3981070073163964
4.357080705409102
4.357080705409102
5.253588618943616
5.253588618943616
6.134358976784904
6.134358976784904
7.011166722668812
7.011166722668812
7.886975577639331
7.886975577639331
8.762532299232232
8.762532299232232
9.63802484694836
9.63802484694836
10.513503688676819
10.513503688676819
11.388975387342981
11.388975387342981
12.264447545725693
12.264447545725693
13.139921158820448
13.139921158820448
14.015392445003354
14.015392445003354
14.890864235466262
14.890864235466262
15.766333738674502
15.766333738674502
16.641804765980538
16.641804765980538
17.517279158542443
17.517279158542443
18.392751353144618
18.392751353144618
19.268223111546686
19.268223111546686
20.143694423835267
20.143694423835267
21.019165389260213
21.019165389260213
21.894634037631004
21.894634037631004
22.77010988253073
22.77010988253073
23

2023-02-08 22:44:52.872682: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudnn.so.8
2023-02-08 22:44:54.008520: I tensorflow/stream_executor/cuda/cuda_dnn.cc:359] Loaded cuDNN version 8201
2023-02-08 22:44:56.303818: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11
2023-02-08 22:44:57.496851: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11
2023-02-08 22:44:57.544914: I tensorflow/stream_executor/cuda/cuda_blas.cc:1838] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


46.40783179196933
46.40783179196933
47.28331000480174
47.28331000480174
48.158776026991134
48.158776026991134
49.034255599737605
49.034255599737605
49.9097254410229
49.9097254410229
50.78519640380801
50.78519640380801
51.66067118457783
51.66067118457783
52.5361417093203
52.5361417093203
53.411614636174434
53.411614636174434
54.287085496654946
54.287085496654946
55.16256083262366
55.16256083262366
56.03803182320736
56.03803182320736
56.91350160732085
56.91350160732085
57.78897257492432
57.78897257492432
58.66445123613451
58.66445123613451
59.53992089931597
59.53992089931597
60.41538788302873
60.41538788302873
61.290862744702395
61.290862744702395
62.16633248610974
62.16633248610974
63.041803712680306
63.041803712680306
63.91727127439999
63.91727127439999
64.79275608279043
64.79275608279043
65.63925325153328
65.63925325153328
66.73332609020261
66.73332609020261
67.82875853345367
67.82875853345367
68.61540183905343
68.61540183905343
69.04103507234443
69.04103507234443
69.2349858896961
69.

In [None]:
    #############################TRAIN UNIT#######################
    # Read data:
    X, y = read_data()
    print(X.shape)
    print(y.shape)
    # Preprocess it:
    X_pp, y_pp = preprocess_data(X, y, hist_len=utils.history_length, shuffle=False)
    # Plot action histogram. JUST FOR DEBUGGING.
    if True: plot_action_histogram(y_pp, 'Action distribution BEFORE balancing')   
    
    # Balance samples. Gets hide of 50% of the most common action (accelerate)
    #X_pp, y_pp = utils.balance_actions(X_pp, y_pp, 0.5)
    
    # Plot action histogram. JUST FOR DEBUGGING.
    if True: plot_action_histogram(y_pp, 'Action distribution AFTER balancing')   
    
    # Plot some random states before and after preprocessing. JUST FOR DEBUGGING. 
    # Requires to run the above fucntion with hist_len=1, shuffle=False.
    if False: plot_states(X_pp, X)
    
    # Split data into training and validation:
    X_train, y_train, X_valid, y_valid = split_data(X_pp, y_pp, frac=.1)
    
    # Create a new agent from scratch:
    agent = Agent.from_file(file_name="saved_model.pb")
    
    # Train it:
    agent.train(X_train, y_train, X_valid, y_valid, n_batches=10000, batch_size=100, lr=5e-4, display_step=100)
    
    # Save it to file:
    agent.save('saved_models/')

In [None]:
    X, y = read_data()
    print(X.shape)
    
    print(y.shape)

In [None]:
load_ext tensorboard

In [None]:
tensorboard --logdir tensorboard_data

In [10]:
env.close()

In [1]:
!nvidia-smi

Thu Feb 16 23:01:02 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.05    Driver Version: 525.85.05    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0  On |                  N/A |
| 30%   32C    P5    55W / 320W |   3482MiB / 10240MiB |      7%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!kill -9 38785