In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import time # delay within program
from math import *
import random
import pickle
import os
import json

import nidaqmx # laser output
from pyueye import ueye
from pypyueye import Camera

from improc import *
from worm_env import *

# Benchmarking. 
This includes deterministic policy and simple averaging.

## Random actions
#### Program flow:
* Initializes environment.
* Collects background and sets target angle
* Runs 4 episodes for 10 minutes (600 s) each:
    * While episode is going:
        * Randomly picks action
        * Takes one step
        * Adds results to a trajectory dictionary
    * Prints number of steps taken during episode
    * Saves entire trajectory to .json file as dictionary
* Closes environment.

In [2]:
# Save track, total reward history, observations, endpoints, images, action history
fbase = 'Random_i0_'
env = ProcessedWorm(0,ep_len=600)
eps_per_worm = 4

WORM_NUMBER = 0


for i_episode in np.arange(eps_per_worm)+WORM_NUMBER*eps_per_worm:
    done = False
    fname = fbase+str(i_episode)+'.json'
    trajectory = {}
    
    obs = env.reset(target=(i_episode*90)%360)
    
    while not done:
        action = env.action_space.sample()
        obs, r, done, info = env.step(action)
        print(f'Body and head: {obs} \t\t\r',end='')
        
        # Combining trajectory data in info with previous steps
        add_to_traj(trajectory,info)
        
    print("Episode finished after {} timesteps".format(info['t']))
    
    with open(fname,'wb') as f:
        json.dump(trajectory,f)
    
env.close()

Episode finished after 201 timesteps
Episode finished after 201 timesteps
No worm 		

## Deterministic policy
Same flow as random action above but picks actions according to known AIY policy.

In [73]:
def det_policy(obs):
    # Returns 0 if obs is nans.
    # Otherwise, returns the known deterministic policy.
    if np.isnan(obs[0]):
        return 0
    body_dir,head_body = obs
    if body_dir*head_body < 0:
        action = 1
    elif body_dir == 0:
        if head_body == 0:
            action = 1
        else:
            action = 0
    else:
        action = 0
    return action

In [None]:
fbase = 'Known_i0_'
env = ProcessedWorm(0,ep_len=600)
eps_per_worm = 4

WORM_NUMBER = 0


for i_episode in np.arange(eps_per_worm)+WORM_NUMBER*eps_per_worm:
    done = False
    fname = fbase+str(i_episode)+'.json'
    trajectory = {}
    action = 0
    
    obs = env.reset(target=(i_episode*90)%360)
    
    while not done:
        obs, r, done, info = env.step(action)
        print(f'Body and head: {obs} \t\t\r',end='')
        
        # Combining trajectory data in info with previous steps
        add_to_traj(trajectory,info)
        
        # Chooses action for next step
        action = det_policy(obs)
        
    print("Episode finished after {} timesteps".format(info['t']))
    
    with open(fname,'wb') as f:
        json.dump(trajectory,f)
    
env.close()

## Averaging rewards
Initializes table randomly and takes averages as they're collected.  

For every entry the update step is
$$
\bar{r}_{i+1}\leftarrow \bar{r}_i +\frac{1}{n}(r_n-\bar{r}_i)
$$

Separate averages $\bar{r}$ are maintained for every state-action pair. 

In [78]:
### average in progress

In [None]:
fbase = 'Ave_i0_'
env = ProcessedWorm(0,ep_len=600)
eps_per_worm = 4

WORM_NUMBER = 0


for i_episode in np.arange(eps_per_worm)+WORM_NUMBER*eps_per_worm:
    done = False
    fname = fbase+str(i_episode)+'.json'
    trajectory = {}
    action = 0
    
    obs = env.reset(target=(i_episode*90)%360)
    
    while not done:
        obs, r, done, info = env.step(action)
        print(f'Body and head: {obs} \t\t\r',end='')
        
        # Combining trajectory data in info with previous steps
        add_to_traj(trajectory,info)
        
        # Chooses action for next step
        action = det_policy(obs)
        
    print("Episode finished after {} timesteps".format(info['t']))
    
    with open(fname,'wb') as f:
        json.dump(trajectory,f)
    
env.close()