In [1]:
from pyvirtualdisplay import Display
import gym
import numpy as np
import imageio
import cv2
# import cv2 
import matplotlib.pyplot as plt
import pandas as pd
import time
%matplotlib inline

# Set virtual display
display = Display(visible=0, size=(1400, 900))
display.start()

<pyvirtualdisplay.display.Display at 0x7fa6d44207c0>

In [2]:

def show_image(rgb_array) :
    # no axis and no frame
    plt.axis('off')
    plt.imshow(rgb_array)


In [3]:


def render_episode(env_name) :
    # make the cartpole environment
    env = gym.make('MountainCarContinuous-v0')
    test = env.reset()
    print(len(test))
    # reset the environment to a new, random state
    frames = []
    for t in range(1000):
        # render one frame
        rgb_array = env.render(mode='rgb_array')

        # take a random action
        action = env.action_space.sample()
        # step the environment
        observation, reward, done, info = env.step(action)
        
        # draw observation, reward, done, info on image
        # convert numpy array to cv2 image
        img_bgr = cv2.cvtColor(rgb_array, cv2.COLOR_RGB2BGR)
        # plot reward on image
        cv2.putText(img_bgr, 'reward: {}'.format(reward), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(img_bgr, 'obs: {}'.format(observation), (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(img_bgr, 'action: {}'.format(action), (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2, cv2.LINE_AA)
        # add frame number
        cv2.putText(img_bgr, 'frame: {} / 1000'.format(t), (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2, cv2.LINE_AA)
        
        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

        frames.append(img_rgb)

        # if the episode is done, break
        if done:
            break
        
    # close the environment
    env.close()
    # make gif
    imageio.mimsave('test.gif', frames, fps=30)
    # show gif
    from IPython.display import Image
    Image(open('test.gif','rb').read())

#render_episode('MountainCarContinuous-v0')

In [4]:
# Get 10K data

def collect_data_random_sample(env_name,num_data=10**5) :
    start_time = time.time()
    # make the cartpole environment
    env = gym.make(env_name)
    # reset the environment to a new, random state

    Ss = []
    S_primes = [] 
    As  = []
    Rs = []
    ds = []

    
    S_prime = env.reset()
    for t in range(num_data):
        # render one frame
        rgb_array = env.render(mode='rgb_array')

        # take a random action
        action = env.action_space.sample()
        # step the environment
        S = S_prime
        S_prime, reward, done, info = env.step(action)
        
        Ss.append(S)
        S_primes.append(S_prime)
        As.append(action)
        Rs.append(reward)
        ds.append(done)
        
        if t % 1000 == 0 :
            print('collecting data: {} / {}'.format(t,num_data))
            
        
        # if the episode is done, break
        if done:
            env.reset()
            
    # close the environment
    env.close()
    finish_time = time.time()
    print('collecting data time: {} sec'.format(finish_time-start_time))
    # save data as csv file, filename is env_name-num_data.csv
    df = pd.DataFrame({'state': Ss, 'action': As, 'reward': Rs, 'done': ds, 'next_state': S_primes})
    df.to_csv('data/{}-{}.csv'.format(env_name,num_data), index=False)
    print('data saved as data/{}-{}.csv'.format(env_name,num_data))
    return df

collect_data_random_sample('MountainCarContinuous-v0',num_data=10**5)

collecting data: 0 / 100000
collecting data: 1000 / 100000
collecting data: 2000 / 100000
collecting data: 3000 / 100000


KeyboardInterrupt: 