In [12]:
#library

# - numpy, pandas, matlib, imageio

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import imageio
from tpg.trainer import loadTrainer
import time


## First, we need a environment to interact with TPG agents.


#### environment - (sends the pixels to agent) -> agent - (selects an action from action set) -> environment -> ..... ####

The environment contains 

 - a set of images that possible contains multiple traffic signs. 
 - a piece of rectangle(window) that cut off from the images
     - TPG agents only can index pixels inside the window
     - TPG angents are able to move this rectangle
 - an action set from 1 to 4 that repersent the legal action of agents
     - 1 -> move up
     - 2 -> move left
     - 3 -> move down
     - 4 -> move right
 - support functions that allow env to react the inputs from TPG agents

In [2]:
#### the environment should contains following features.

# input - actions from TPG agents, TPG agent would select one of action from environment action set.

# environment react to input - move the rectangle up,down,left or right.

# return the pxiels inside the retangle to agent

class TrackingEnv:
    def __init__(self,dataset,window_width,window_height,step_move,tolerance):
        '''
        ! you need to ensure the ground true file is shared same repository with ppm images
        
        Initial enironment at the first, users need to enter 
        
        1. traffic signs dataset 
        3. the desirable width and height for the window
        4. tolerance of windows toward the ground true
        
        '''
        print("Setting tracking environment")
        self.dataset = dataset
        self.window_width = window_width
        self.window_height = window_height
        self.tolerance = tolerance
        self.step_move = step_move
        
        self.reset()
        
    # show its status
    def __str__(self):
        return self.__repr__()
    def __repr__(self):
        return 'curr_window #'+str(self.index)+' :\n'+str(self.curr_window)
    def act(self,action):
        if action == 1:
            self.curr_window.moveUp()
        elif action == 2:
            self.curr_window.moveLeft()
        elif action == 3:
            self.curr_window.moveDown()
        elif action == 4:
            self.curr_window.moveRight()
        
        return self.curr_window.returnCurrState()

    # rest everything to default
    def reset(self):
        self.index = 0
        self.curr_window = Window( 
            self.dataset.loc[self.index],
            self.window_width, 
            self.window_height,  
        
            self.step_move,
            self.tolerance
        )
        return self.curr_window.returnCurrState()[0]
    
    # next image
    def next_image(self):
        self.index += 1
        self.curr_window = Window( 
            self.dataset.loc[self.index],
            self.window_width, 
            self.window_height,  
        
            self.step_move,
            self.tolerance 
        )
        return self.curr_window.returnCurrState()[0]
    
    #show current image
    def show_image(self):
        plt.imshow(self.curr_window.image) 

In [3]:
class Window:
    '''
    window is a rectangle frame inside the image, the agent only allow to index the pixel inside this rectangle frame.
    
    agent send action to window via TrackEnv. Then, window moves itself according to different action and return the lastest reward, and state to agent.
       
    '''
    def __init__(self,record,width,height,step_move,tolerance):
        self.tolerance = tolerance
        self.image = imageio.imread(record[0])
        self.ground_true = record[1:]
        self.step_move = step_move
        
        self.max_height, self.max_width, _ = self.image.shape
        self.x1 = int(self.max_width/2 - width)
        self.x2 = int(self.max_width/2 + width)
        self.y1 = int(self.max_height/2 - height)
        self.y2 = int(self.max_height/2 + height)
    
    #status
    def __str__(self):
        return 'groud_true:\n'+str(self.ground_true)
        
#         self.window = self.image[self.y1:self.y2,self.x1:self.x2]
    # actions
    def moveUp(self):
        if self.y1 > 0:
            self.y1 -= self.step_move
            self.y2 -= self.step_move
    def moveDown(self):
        if self.y2 < self.max_height:
            self.y1 += self.step_move
            self.y2 += self.step_move
    def moveLeft(self):
        if self.x1 > 0:
            self.x1 -= self.step_move
            self.x2 -= self.step_move
    def moveRight(self):
        if self.x2 < self.max_width:
            self.x1 += self.step_move
            self.x2 += self.step_move
    
    # return curr state to agent
    def returnCurrState(self):
        reward,isDone = self.isTargetInsight()
#         state = [self.x1,self.x2,self.y1,self.y2]
        state = self.image[self.y1:self.y2,self.x1:self.x2]
        return (state,
               reward,
               isDone)
    
    # if the window contains group true, reward is set to 1. Otherwise, reawrd is set to 0.
    def isTargetInsight(self):
        
        reward = 1 if self.x1 <= self.ground_true.x1 and self.x2 >= self.ground_true.x2 and self.y1 <= self.ground_true.y1 and self.y2 >= self.ground_true.y2 else 0
        isDone = reward
        
        return (reward,isDone)

## Read image from dataet and clean data
#### current only for single traffic sign tracking

In [4]:
def multiple_traffic_sign_read(f):
        data = []
        for line in f:
            items = line.strip().split(";")
            items[0] = export_path + items[0]
            for i in range(1,5):
                items[i] = int(items[i])
            data.append(items)
        return data
    
def single_traffic_sign_read(f):
    data = []
    lines = []
    for line in f:
        items = line.strip().split(";")
        if len(lines) == 0:
            lines.append(items)
        else:
            if lines[0][0] != items[0]:
                if len(lines) == 1:
                    prev = lines[0]
                    prev[0] = export_path + prev[0]
                    for i in range(1,5):
                        prev[i] = int(prev[i])
                    data.append(prev)
                lines = [items]
            else:
                lines.append(items)
    if len(lines) == 1:
                prev = lines[0]
                prev[0] = export_path + prev[0]
                for i in range(1,5):
                    prev[i] = int(prev[i])
                data.append(prev)
    return data
# read ground true data ! only for single traffic sign tracking
def read_gt(fname,export_path):
    f = open(export_path+fname)

#     data = multiple_traffic_sign_read(f)      choose one of two
    data = single_traffic_sign_read(f)


    f.close()

    df = pd.DataFrame(data,columns=["fname","x1","y1","x2","y2","type"])
    return df

In [5]:
export_path =  "../res/TrainIJCNN2013/"
fpath       =  "gt.txt"
dataset = read_gt(fpath,export_path)

## import TPG agent and trainer

In [7]:
# import to do training
from tpg.trainer import Trainer
# import to run an agent (always needed)
from tpg.agent import Agent
#
from IPython import display

In [8]:
# To transform pixel matrix to a single vector.
def getState(inState):
    # each row is all 1 color
    rgbRows = np.reshape(inState,(len(inState[0])*len(inState), 3)).T

    # add each with appropriate shifting
    # get RRRRRRRR GGGGGGGG BBBBBBBB
    return np.add(np.left_shift(rgbRows[0], 16),
        np.add(np.left_shift(rgbRows[1], 8), rgbRows[2]))

In [9]:
def display_state(env):
    gt = env.curr_window.ground_true
    gx = (gt.x1 + gt.x2)/2
    gy = (gt.y1 + gt.y2)/2


    x = (env.curr_window.x1 + env.curr_window.x2)/2

    y = (env.curr_window.y1 + env.curr_window.y2)/2
    
    print("Distance:",((gx-x)**2+(gy-y)**2)**0.5)

## single process training

In [14]:

# set the trainer
%matplotlib inline
ftl = 300 # frame to live
level = 100 # how many image used to train
generation = 10
time_format = '{:.2f}'

#setting enironment
env = TrackingEnv(
                 dataset=        dataset
                ,window_width=   50
                ,window_height=  50
                ,step_move=      5
                ,tolerance=      5
               )
#timer 
aStart = time.time()

#setting trainer
trainer = Trainer(actions=range(1,5), teamPopSize=40, rTeamPopSize=20) 


curScores = [] # hold scores in a generation
summaryScores = pd.DataFrame([],columns=['min','max','average']) # record score summaries for each gen (min, max, avg)


for gen in range(generation): # generation loop
    print('starting generation #'+str(gen)+'....')
    
    tStart = time.time()
    curScores = [] # new list per gen
    
    agents = trainer.getAgents()
    
    while True: # loop to go through agents
        teamNum = len(agents)
        agent = agents.pop()
        if agent is None:
            break # no more agents, so proceed to next gen
        
        state = env.reset() # get initial state and prep environment
        score = 0
        
        
        for l in range(level):
            for i in range(ftl): # run episodes 
                # get action from agent
                # must transform to at-least int-32 (for my getState to bitshift correctly)
                act = agent.act(getState(np.array(state, dtype=np.int32))) 

                # feedback from env
                state, reward, isDone = env.act(act)
                score += reward # accumulate reward in score
                if isDone:
                    break # end early if losing state
            #move to next image
            env.next_image()

        agent.reward(score) # must reward agent (if didn't already score)
        
        curScores.append(score) # store score
        
        if len(agents) == 0:
            break
    print('min:',min(curScores),'max:', max(curScores),'average:',sum(curScores)/len(curScores))
    print('Finish generation #'+str(gen)+'...in '+time_format.format(time.time()-tStart)+" sec")
    
    # at end of generation, make summary of scores
    summaryScores.loc['Gen#'+str(gen)] = [min(curScores), max(curScores),sum(curScores)/len(curScores)] # min, max, avg
    
    trainer.saveToFile('Trainer')
    summaryScores.to_csv(path_or_buf='summaryScores.csv')
    
    trainer.evolve()

print("training complete...in "+time_format.format(time.time()-aStart)+" sec")

Setting tracking environment
starting generation #0....
min: 3 max: 26 average: 8.9
Finish generation #0...in 139.32 sec
starting generation #1....
min: 3 max: 27 average: 11.421052631578947
Finish generation #1...in 133.20 sec
starting generation #2....
min: 3 max: 26 average: 15.13888888888889
Finish generation #2...in 121.67 sec
starting generation #3....
min: 5 max: 26 average: 22.21212121212121
Finish generation #3...in 105.66 sec
starting generation #4....


KeyboardInterrupt: 

## Multiprocesses training

In [10]:
def runAgent(args):
    #assign arguments
    agent = args[0]
    env_name = args[1]
    env_queue = args[2]
    scoreList = args[3]
    level = args[4] 
    ftl = args[5]

    #stats report
    tStart = time.time()
    
    #get env from shared queue
    env = env_queue.get()
    
    # skip if task already done by agent
    if agent.taskDone(env_name):
#         print('Agent #' + str(agent.agentNum) + ' can skip.')
        scoreList.append((agent.team.id, agent.team.outcomes))
        env_queue.put(env)
        return
    
    
    state = env.reset()
    
    scoreTotal = 0 # score accumulates over all episodes
    for l in range(level): # episode loop
        for i in range(ftl): # frame loop
            
            #agent act
            act = agent.act(getState(np.array(state, dtype=np.int32))) 

            # feedback from env
            state, reward, isDone = env.act(act)
            scoreTotal += reward # accumulate reward in score
            if isDone:
                break # end early if losing state
        #move to next image
        env.next_image()
    
    agent.reward(scoreTotal,env_name)
    scoreList.append((agent.team.id, agent.team.outcomes))
    env_queue.put(env)
#     print("finish prcoess "+str(mp.Process()._identity[0])+' in '+'{:.2f}'.format(time.time()-tStart)+" sec")

In [20]:
import multiprocessing as mp

# is reload agent?
reload = True
# set the params
%matplotlib inline
ftl = 200 # frame to live
level = 100 # how many image used to train
generation = 50
num_processes = 16
time_format = '{:.2f}'


#timer 
aStart = time.time()

#setting process pool
man = mp.Manager()
#setting enironment
env_name = 'Track_Traffic_sign'
env_queue = man.Queue()
for i in range(num_processes):
    env_queue.put(TrackingEnv(
                     dataset=        dataset
                    ,window_width=   50
                    ,window_height=  50
                    ,step_move=      5
                    ,tolerance=      5))
#setting trainer
if os.path.exists('./Trainer') and not reload:
    trainer = loadTrainer('Trainer')
else:
    trainer = Trainer(actions=range(1,5), teamPopSize=20, rTeamPopSize=20) 



summaryScores = pd.DataFrame([],columns=['min','max','average']) # record score summaries for each gen (min, max, avg)


for gen in range(generation): # generation loop
    print('starting generation #'+str(gen)+'....')
    scoreList = man.list()
    
    tStart = time.time()
    
    #get agents, noRef to not hold reference to trainer in each one
    # don't need reference to trainer in multiprocessing
    agents = trainer.getAgents() # swap out agents only at start of generation
    
    # create pool
    pool = mp.Pool(processes=num_processes)
    # run the agents
    pool.map(runAgent, 
        [(agent,env_name, env_queue, scoreList, level, ftl)
        for agent in agents])
    #wait for pool close
    pool.close()
    pool.join()
    
    # apply scores, must do this when multiprocessing
    # because agents can't refer to trainer
    teams = trainer.applyScores(scoreList)
    #generate new population
    trainer.evolve(tasks=[env_name])
    
    curScores = trainer.fitnessStats
    print('min:',curScores['min'],'max:', curScores['max'],'average:',curScores['average'])
    print('Finish generation #'+str(gen)+'...in '+time_format.format(time.time()-tStart)+" sec")
    
    
    # at end of generation, make summary of scores
    summaryScores.loc['Gen#'+str(gen)] = [curScores['min'],curScores['max'],curScores['average']] # min, max, avg
    
    trainer.saveToFile('Trainer')
    summaryScores.to_csv(path_or_buf='summaryScores.csv')
    
print("training complete...in "+time_format.format(time.time()-aStart)+" sec")

Setting tracking environment
Setting tracking environment
Setting tracking environment
Setting tracking environment
Setting tracking environment
Setting tracking environment
Setting tracking environment
Setting tracking environment
Setting tracking environment
Setting tracking environment
Setting tracking environment
Setting tracking environment
Setting tracking environment
Setting tracking environment
Setting tracking environment
Setting tracking environment
starting generation #0....
min: 3 max: 26 average: 9.95
Finish generation #0...in 35.59 sec
starting generation #1....
min: 3 max: 26 average: 14.45
Finish generation #1...in 28.31 sec
starting generation #2....
min: 6 max: 26 average: 20.4
Finish generation #2...in 14.58 sec
starting generation #3....
min: 3 max: 26 average: 20.904761904761905
Finish generation #3...in 24.90 sec
starting generation #4....
min: 3 max: 26 average: 22.952380952380953
Finish generation #4...in 18.66 sec
starting generation #5....
min: 3 max: 26 avera