In [1]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import pickle
from matplotlib import style
import time
import random

style.use("ggplot")

In [2]:
class TrafficLights:
    def __init__(self, env):
        self.x1 = 33 // env.m
        self.y1 = 33 % env.n
        self.c1 = 'red'
        self.x2 = 36 // env.m
        self.y2 = 36 % env.n
        self.c2 = 'green'
        self.x3 = 63 // env.m
        self.y3 = 63 % env.n
        self.c3 = 'green'
        self.x4 = 66 // env.m
        self.y4 = 66 % env.n
        self.c4 = 'red'

    def __str__(self):
        return f"{self.x}, {self.y}"

    def __sub__(self, other):
        return (self.x-other.x, self.y-other.y)

    def action(self, choice):
        '''
        Gives us 2 options.
        '''
        if choice == 'C':
            self.change('C')
            
    def change(self, action = False):
        if not action:
            choice_pair1 = random.choice(['green', 'red'])
            choice_pair2 = random.choice(['green', 'red'])
            self.c1 = choice_pair1
            self.c2 = choice_pair2
            self.c3 = choice_pair2
            self.c4 = choice_pair1
        else:
            if action == 'C':
                if self.c1 == 'green':
                    self.c1 = 'red'
                else:
                    self.c1 = 'green'
                if self.c2 == 'green':
                    self.c2 = 'red'
                else:
                    self.c2 = 'green'
                if self.c3 == 'green':
                    self.c3 = 'red'
                else:
                    self.c3 = 'green'
                if self.c4 == 'green':
                    self.c4 = 'red'
                else:
                    self.c4 = 'green'


In [3]:
class Car:
    def __init__(self, pos, flow, env):
        self.x = pos // env.m
        self.y = pos % env.n
        self.flow = flow
    def move(self):
        if self.flow == 'H-R':
            if self.y !=9:
                if self.x == 5 and self.y == 3:
                    if env.TrafficLights.c3 == 'green':
                        self.y += 1
                else:
                    self.y += 1
        elif self.flow == 'H-L':
            if self.y !=0:
                if self.x == 4 and self.y == 6:
                    if env.TrafficLights.c2 == 'green':
                        self.y -= 1
                else:
                    self.y -= 1
        elif self.flow == 'V-U':
            if self.x != 0:
                if self.x == 6 and self.y == 5:
                    if env.TrafficLights.c4 == 'green':
                        self.x -= 1
                else:
                    self.x -= 1
        elif self.flow == 'V-D':
            if self.x != 9:
                if self.x == 3 and self.y == 4:
                    if env.TrafficLights.c1 == 'green':
                        self.x += 1
                else:
                    self.x += 1

In [5]:
class GridWorld(object):
    def __init__(self, m, n):
        self.grid = np.zeros((m,n,3), dtype = np.uint8)
        self.m = m
        self.n = n
        self.colors = {'red': (0, 0, 255),
                       'blue': (255, 0, 0),
                       'green': (0, 255, 0),
                       'yellow': (0, 255, 255),
                       'grey': (105, 105, 105)
                      }
        self.actionSpace = {'K': 'same colors', 'C': 'change colors'}
        self.possibleActions = ['K', 'C']
        self.Buildings = [0, 1, 2, 3, 6, 7, 8, 9,
                          10, 11, 12, 13, 16, 17, 18, 19,
                          20, 21, 22, 23, 26, 27, 28, 29,
                          30, 31, 32, 33, 36, 37, 38, 39,
                          60, 61, 62, 63, 66, 67, 68, 69,
                          70, 71, 72, 73, 76, 77, 78, 79,
                          80, 81, 82, 83, 86, 87, 88, 89,
                          90, 91, 92, 93, 96, 97, 98, 99]
        self.addTrafficLights(self)
        self.addCars(self)

    def addBuildings(self):
        for building in self.Buildings:
            x = building // self.m
            y = building % self.n
            self.grid[x][y] = self.colors['grey']
    
    def addTrafficLights(self, env):
        self.TrafficLights = TrafficLights(env)
        env.grid[env.TrafficLights.x1][env.TrafficLights.y1] = env.colors[env.TrafficLights.c1]
        env.grid[env.TrafficLights.x2][env.TrafficLights.y2] = env.colors[env.TrafficLights.c2]
        env.grid[env.TrafficLights.x3][env.TrafficLights.y3] = env.colors[env.TrafficLights.c3]
        env.grid[env.TrafficLights.x4][env.TrafficLights.y4] = env.colors[env.TrafficLights.c4]
    
    def addCars(self, env):
        car1 = Car(4, 'V-D', env)
        car2 = Car(95, 'V-U', env)
        car3 = Car(50, 'H-R', env)
        car4 = Car(49, 'H-L', env)
        cars = []
        cars.append(car1)
        cars.append(car2)
        cars.append(car3)
        cars.append(car4)
        self.Cars = cars
        for car in self.Cars:
            self.grid[car.x][car.y] = self.colors['yellow']
    
    def getState(self):
        self.lane1 = []
        self.lane2 = []
        self.lane3 = []
        self.lane4 = []
        for i in range(5):
            if self.grid[5][i][0] == 0 and self.grid[5][i][1] == 255 and self.grid[5][i][2] == 255:
                self.lane1.append(1)
            else:
                self.lane1.append(0)
        lane1_state = str(lane1[0]) + str(lane1[1]) + str(lane1[2]) + str(lane1[3]) + str(lane1[4])
        for j in range(5, 10):
            if self.grid[j][5][0] == 0 and self.grid[j][5][1] == 255 and self.grid[j][5][2] == 255:
                self.lane1.append(1)
            else:
                self.lane1.append(0)
        lane2_state = str(lane2[0]) + str(lane2[1]) + str(lane2[2]) + str(lane2[3]) + str(lane2[4])
        for k in range(5, 10):
            if self.grid[4][k][0] == 0 and self.grid[4][k][1] == 255 and self.grid[4][k][2] == 255:
                self.lane1.append(1)
            else:
                self.lane1.append(0)
        lane3_state = str(lane3[0]) + str(lane3[1]) + str(lane3[2]) + str(lane3[3]) + str(lane3[4])
        for l in range(5):
            if self.grid[l][4][0] == 0 and self.grid[l][4][1] == 255 and self.grid[l][4][2] == 255:
                self.lane1.append(1)
            else:
                self.lane1.append(0)
        lane4_state = str(lane4[0]) + str(lane4[1]) + str(lane4[2]) + str(lane4[3]) + str(lane4[4])

        if self.TrafficLights.c1 == 'green':
            flow = 'V'
        else:
            flow = 'H'
            
        return lane1_state, lane2_state, lane3_state, lane4_state, flow
        
    def update_grid(self):
        self.grid = np.zeros((self.m, self.n, 3), dtype = np.uint8)
        self.addBuildings()
        self.grid[self.TrafficLights.x1][self.TrafficLights.y1] = self.colors[self.TrafficLights.c1]
        self.grid[self.TrafficLights.x2][self.TrafficLights.y2] = self.colors[self.TrafficLights.c2]
        self.grid[self.TrafficLights.x3][self.TrafficLights.y3] = self.colors[self.TrafficLights.c3]
        self.grid[self.TrafficLights.x4][self.TrafficLights.y4] = self.colors[self.TrafficLights.c4]
        for car in self.Cars:
            self.grid[car.x][car.y] = self.colors['yellow']
        return self.grid
            
    def render(self):
        self.grid = self.update_grid()
        img = Image.fromarray(self.grid, 'RGB')
        img = img.resize((300, 300))
        cv2.imshow("image", np.array(img))
        

In [7]:
start_q_table = None
if start_q_table is None:
    # initialize the q-table#
    q_table = {}
    l = ['{0:04b}'.format(i) for i in range(16)]
    for value in l:
        for val in l:
            for va in l:
                for v in l:
                    for s in ['H', 'V']:
                        q_table[(value, val, va, v, s)] = [np.random.uniform(-5, 0) for i in range(2)]

# else:
#     with open(start_q_table, "rb") as f:
#         q_table = pickle.load(f) 

In [9]:
n_episodes = 25000
show_every = 1000
for episode in range(n_episodes):
    env = GridWorld(10, 10)
    if episode % show_every == 0:
        print(f"on #{episode}")
        show = True
    else:
        show = False
    for i in range(200):
        for car in env.Cars:
            car.move()
        if show:
            env.grid = env.update_grid()
            img = Image.fromarray(env.grid, 'RGB')
            img = img.resize((300, 300), resample = Image.NEAREST)
            cv2.imshow("image", np.array(img))
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

on #0
on #1000
on #2000
on #3000
on #4000
on #5000
on #6000
on #7000
on #8000
on #9000
on #10000
on #11000
on #12000
on #13000
on #14000
on #15000
on #16000
on #17000
on #18000
on #19000
on #20000
on #21000
on #22000
on #23000
on #24000


---

[-1.0819861089047618, -4.08166017654567]