In [5]:
import random
import json
import numpy as np
from itertools import permutations
import matplotlib.pyplot as plt
from tkinter import *
import time as tm
import os
import math
import torch
import errno
from config import ENV_CONSTANTS, LIGHT_CONSTANTS, CAR_CONSTS, FILES, STATE_CONSTANTS

In [6]:
class TrafficLight:
    '''
        Defines the representation of a single traffic light in our environment
    '''

    def __init__(self, id):
        self.directionIsNorthSouth = bool(random.getrandbits(1))
        self.timeChanged = 0
        self.id = id
        self.queues = self.__init_queues()
        # LIGHT NOTATION: 0 = northern neighbour, 1 = east neighbour, 2 = south neighbour, 3 = west neighbour
        self.neighbours = [None, None, None, None]
    
    def __init_queues(self):
        queues = []
        for direction in LIGHT_CONSTANTS["ACTION_DIR"]:
            id = LIGHT_CONSTANTS["ACTION_DIR"][direction]
            queues.append(LightQueue(id))
        return queues

    def changeLight(self, time):
        ''' Toggle light direction and set time '''
        self.directionIsNorthSouth = not self.directionIsNorthSouth
        self.timeChanged = time

    def getNumCars(self):
        return sum([queue.getNumCars() for queue in self.queues])

    def getNumCarsWaiting(self):
        return sum([queue.getNumCarsWaiting() for queue in self.queues])

    def getWaitTimes(self, time, totalWaitTime):
        def __bin(wait_time):
            # Bin the total wait time
            # Update size of Q table if number of bins changes
            if wait_time > LIGHT_CONSTANTS["TIME_BINS"]["large"]["lowerBound"](totalWaitTime):
                return LIGHT_CONSTANTS["TIME_BINS"]["large"]["penalty"]
            elif wait_time > LIGHT_CONSTANTS["TIME_BINS"]["medium"]["lowerBound"](totalWaitTime):
                return LIGHT_CONSTANTS["TIME_BINS"]["medium"]["penalty"]
            elif wait_time > LIGHT_CONSTANTS["TIME_BINS"]["small"]["lowerBound"]:
                return LIGHT_CONSTANTS["TIME_BINS"]["small"]["penalty"]
            else:
                return LIGHT_CONSTANTS["TIME_BINS"]["zero"]["penalty"]
        n,s,e,w = LIGHT_CONSTANTS["ACTION_DIR"]["n"],LIGHT_CONSTANTS["ACTION_DIR"]["s"],LIGHT_CONSTANTS["ACTION_DIR"]["e"],LIGHT_CONSTANTS["ACTION_DIR"]["w"]
        NS = __bin(self.queues[n].getWaitTimes(time) + self.queues[s].getWaitTimes(time))
        EW = __bin(self.queues[e].getWaitTimes(time) + self.queues[w].getWaitTimes(time))
        if self.directionIsNorthSouth: # only give the cost if that light is red. If it's green the car will get through eventualy.
            return LIGHT_CONSTANTS["TIME_BINS"]["zero"]["penalty"], EW
        return NS,LIGHT_CONSTANTS["TIME_BINS"]["zero"]["penalty"]

    def pushCarToNextLight(self, car, action, time):
        """
            Takes in car to be pushed to an adjacent traffic lights queue
            action == "n" => push car to north neighbour's north facing queue
            action == "s" => push car to south neighbour's south facing queue
            ... etc.
        """
        direction = LIGHT_CONSTANTS["ACTION_DIR"][action]
        assert self.neighbours[direction], "neighbor does not exist"
        queue = self.neighbours[direction].queues[direction]
        initLength = queue.getNumCars()
        queue.pushCar(car, time)
        assert(queue.getNumCars() - initLength == 1)

    def updateQueues(self, time):
        ''' 
        Move cars in direction the light is set 
        Returns the list of times each removed car was in the environment
        '''
        def __subtract(car): car.delay = max(0,car.delay-1)
        queues = []
        travelTimes = []
        if self.directionIsNorthSouth:
            queues = [self.queues[0], self.queues[2]]
        else:
            queues = [self.queues[1], self.queues[3]]

        for queue in self.queues:
            for car in queue.cars:
                car.delay = max(0,car.delay-1)
                # map(__subtract,queue.cars)
        for queue in queues:
            if queue.getNumCars():
                peakedCar = queue.peakCar()
                assert(peakedCar.route)
                nextCarAction = peakedCar.route[0]
                direction = LIGHT_CONSTANTS["ACTION_DIR"][nextCarAction]
                if peakedCar.delay > 0:
                    peakedCar.delay = max(0,peakedCar.delay-1)
                else:
                    if self.neighbours[direction] is None:
                        # If a car is at the north most intersection and wants to continue north,
                        # it exits the city
                        queue.popCar()
                        travelTimes.append(time - peakedCar.enteredEnvironment)
                        del peakedCar
                    else:
                        car = queue.popCar()
                        car.route.pop(0)
                        car.delay = car.MAX_DELAY
                        self.pushCarToNextLight(car, nextCarAction, time)
        return travelTimes

    def addNeighbour(self, direction, light):
        """
            Takes in a direction (n,e,s,w) and a traffic light and adds it to be an adjacent 
            traffic light.
        """
        lightToChange = LIGHT_CONSTANTS["ACTION_DIR"][direction]
        self.neighbours[lightToChange] = light

    def __str__(self):
        return "< {}. Total Cars: {}>".format(self.id, self.getNumCars())
    

class Car:
    '''
    Defines each car in the environment
    :startTime: (int) the time the car got to a light, None if not at a light
    :route: a list of directions representing the route the car will take through the environment 
        route is represented as follows: ['n', 's', 'e', 'w']
    '''

    def __init__(self, route, startTime=None):
        #self.startLocation = route.pop(0)
        self.position = route[0]
        self.MAX_DELAY = CAR_CONSTS["MAX_DELAY"]
        self.route = route
        self.startTime = startTime
        self.delay = CAR_CONSTS["MAX_DELAY"]
        self.enteredEnvironment = startTime

class LightQueue:
    '''
    Represents the queue at each direction of a light
    '''

    def __init__(self, id, cars=[], time=0):
        self.id = id
        self.cars = cars[:]
        for car in self.cars:
            car.startTime = time

    def __str__(self):
        return "< Light Queue {}> ".format(self.id)

    def pushCar(self, car, time):
        ''' 
        Add a single car to the end of the queue and sets its start time
        '''
        initLength = len(self.cars)
        car.startTime = time
        self.cars.append(car)
        assert(len(self.cars) - initLength == 1)

    def peakCar(self):
        return self.cars[0]

    def popCar(self):
        ''' 
        Pop a single car off the beginning of the queue and return it 
        '''
        initNumCars = len(self.cars)
        car = self.cars.pop(0)
        assert(initNumCars - len(self.cars) == 1)
        return car

    def getNumCars(self):
        return len(self.cars)

    def getNumCarsDriving(self):
        return len([car for car in self.cars if car.delay])

    def carsWaiting(self):
        return [car for car in self.cars if not car.delay]

    def getNumCarsWaiting(self):
        return len(self.carsWaiting())

    def getWaitTimes(self, time):
        # The delay attribute in the car class represents how far away it is from 
        # starting it's "wait" in the queue. This is like saying the car isn't
        # stationary if it's delay is >0 and we don't count it towards the agent's
        # cost.
        waitTimes = [time - car.startTime for car in self.carsWaiting()]
        assert(sum(waitTimes) >= 0),"time: {}, sum: {}, car starts: {}, delays: {}".format(time,waitTimes,[car.startTime for car in self.carsWaiting()],[car.delay for car in self.carsWaiting()])
        return sum(waitTimes)

In [7]:
# Classes for Car, TrafficLight, LightQueue, Agent, Environment, and Graphing functions, including the Main simulation and Visualizer
# Each class and function should be structured here similar to the above-given descriptions

class Agent:

    def __init__(self, environment, discount=0.5, epsilon=0.01, lr=0.9 ,lights=4, discreteCosts=3, numActions=16, numDayTime=5, continueTraining=False):
        ''' 
        init as equiprobable
        the policy for each light is represented as an index of the policy array
        p[a|s]

        State representation:
            [
                L1Direction,...,L4Direction,
                f(L1CulmTime(N/S)),f(L1CulmTime(E/W)),...,f(L4CulmTime(N/S)),f(L4CulmTime(E/W)),
                timeOfDay
            ]
        '''
        self.discount = discount
        self.epsilon = epsilon
        self.lr = lr
        self.environment = environment
        self.numStates = (2**lights)*(discreteCosts**(2*lights)) # *numDayTime Number of possible lights * traffic wait times * times of day
        self.numActions = numActions
        self.qTable = {}
        if continueTraining:
            with open(FILES["LOAD_FILE"]) as qTable:
                self.qTable = json.load(qTable)
            print("Loaded qTable from {}".format(FILES["LOAD_FILE"]))
        self.lightChangeCost = -1
        self.actionMap = self.generateActionMap()
        print("Initial qTable length: {}".format(len(self.qTable)))
     
    def generateActionMap(self):
        lst = [0,0,0,0]
        actionMap = []
        for i in range(4):
            perms = set(permutations(lst))
            for action in perms:
                actionMap.append(action)
            lst[i] = 1
        actionMap.append(lst)
        return actionMap

    def qVal(self,state):
        """
            Returns the action values of a particular state for the Q-table. 
            Note the state variable is of the form:
            state = [
                L1Direction,...,L4Direction,
                f(L1CulmTime(N/S)),f(L1CulmTime(E/W)),...,f(L4CulmTime(N/S)),f(L4CulmTime(E/W)),
                timeOfDay
            ]
        """
        state = str(state)
        if state in self.qTable:
            return self.qTable[state]
        else:
            self.qTable[state] = np.zeros(self.numActions)
            return self.qTable[state]

    def softmax(self,state):
        """Compute policy using softmax values for each sets of scores in x."""
        policy = np.zeros(self.numActions)
        actions = self.qVal(state)
        e_x = np.exp(actions - np.max(actions))
        policy = e_x / e_x.sum(axis=0)
        return np.random.choice(np.arange(len(policy)), p=policy)   

    def eGreedy(self, state):
        ''' 
            Return the e-greedy action for a given state
        '''
        policy = np.zeros(self.numActions)
        actions = self.qVal(state)
        allEqual = actions == actions[0]
        if np.all(allEqual):
            # If all the elements are equal, random walk
            policy = np.ones(self.numActions) / self.numActions
        else:
            policy = np.ones(self.numActions) * (self.epsilon / self.numActions)
            bestAction = np.argmax(actions)
            policy[bestAction] += 1.0 - self.epsilon

        return np.random.choice(np.arange(len(policy)), p=policy)    

    def greedyAction(self,state):
        """
            state is defined in Environment.toState()

            Returns (column in the QTable with the highest value, that value)
        """
        actions = self.qVal(state)
        return np.argmax(actions), np.max(actions)

    def updateLights(self,time,greedy=False):
        """ 
            Update lights based on policy
            returns the "column" in the qtable that we updated
        """
        state = self.environment.toState(time)
        actionIndex = None
        if STATE_CONSTANTS["POLICY"] == "softmax": 
            actionIndex = self.softmax(state)
        elif STATE_CONSTANTS["POLICY"] == "egreedy":
            actionIndex = self.eGreedy(state)

        if greedy:
            actionIndex, _ = self.greedyAction(state)
        action = self.actionMap[actionIndex]
        for newLightDir, oldLightDir, i in zip(action, state[:4], range(0,4)): 
            if not newLightDir == oldLightDir:
                self.environment.lights[i].changeLight(time) 
        return actionIndex

    def updateQTable(self,previousState,newState,action,waitTime):
        stateIsNew = str(newState) not in self.qTable
        newLights = self.actionMap[action]
        oldLights = previousState[:4]
        reward = waitTime*-1
        for oldDirection,newDirection in zip(oldLights,newLights):
            if oldDirection != newDirection:
                r = reward
                reward+=self.lightChangeCost
        _ , greedyNext = self.greedyAction(newState)
        oldVal = self.qVal(previousState)[action]
        update = oldVal + self.lr * (reward + self.discount * greedyNext - oldVal)
        # assert(update <= 0),"update: {}, oldVal: {}, greedyNext: {}, reward: {}, wait time delta: {}".format(update,oldVal,greedyNext,reward,waitTimeDelta)
        self.__updateQTable(previousState,action,update)
        return stateIsNew

    def __updateQTable(self,state,action,value):
        state = str(state)
        # print("stateIsNew: {}, {}".format(stateIsNew, self.qTable[state]))
        if state not in self.qTable:
            self.qTable[state] = np.zeros(self.numActions)
        self.qTable[state][action] = value
# Continue similarly for other classes and function

class Environment:
    '''
        Represents our environment with 4 traffic lights
    '''

    def __init__(self, time):
        # [0] = north-west
        # [1] = north-east
        # [2] = south-east
        # [3] = south-west
        self.__init_lights()
        self.currentTime = time
        self.lights = self.__init_lights()
        self.MAX_CARS = ENV_CONSTANTS["MAX_CARS"]
        self.isRushHour = self.__highTraffic(time)

    def __init_lights(self):
        lights = [None,None,None,None]
        for key in ENV_CONSTANTS["LIGHT_POSITIONS"]:
            # NW light has a key at 0, this is the index in the lights array that it will reside
            position = ENV_CONSTANTS["LIGHT_POSITIONS"][key]
            lights[position] = TrafficLight(key)   
        NW = ENV_CONSTANTS["LIGHT_POSITIONS"]["NW"]
        NE = ENV_CONSTANTS["LIGHT_POSITIONS"]["NE"]
        SE = ENV_CONSTANTS["LIGHT_POSITIONS"]["SE"]
        SW = ENV_CONSTANTS["LIGHT_POSITIONS"]["SW"]
        lights[NW].addNeighbour('e', lights[NE])
        lights[NW].addNeighbour('s', lights[SW])

        lights[NE].addNeighbour('w', lights[NW])
        lights[NE].addNeighbour('s', lights[SE])

        lights[SE].addNeighbour('n', lights[NE])
        lights[SE].addNeighbour('w', lights[SW])

        lights[SW].addNeighbour('n', lights[NW])
        lights[SW].addNeighbour('e', lights[SE])
        return lights

    def addCarToQueue(self, car, time):
        position = car.route.pop(0)
        lightIdx = position[0]
        light = self.lights[lightIdx]
        queueIdx = position[1]
        queue = light.queues[queueIdx]

        queue.pushCar(car, time)

    def __highTraffic(self,time):
        return any(t[0] <= time <= t[1] for t in ENV_CONSTANTS["RUSH_HOUR_TIMES"])

    # TODO: create path through update for all_routes
    def addAllCars(self, time, allRoutes):
        """
            Probabilistically determines how many cars should be added at a given
            time step
        """

        highTraffic = self.__highTraffic(time)
        numCarsToAdd = 0

        if highTraffic:
            numCarsToAdd = random.randint(5, 10)
        else:
            numCarsToAdd = random.randint(0, 4)

        numCarsToAdd = min(self.MAX_CARS - self.getNumCars(), numCarsToAdd)

        for _ in range(numCarsToAdd):
            route = random.choice(allRoutes)[:]
            newCar = Car(route, startTime=time)
            self.addCarToQueue(newCar, time)

    def update(self, time, allRoutes):
        self.addAllCars(time, allRoutes)
        travelTimes = []
        for light in self.lights:
            travelTimes += light.updateQueues(time) # concat
        self.currentTime = time
        self.isRushHour = self.__highTraffic(time)
        if self.getNumCars():
            return self.getCost(time)/self.getNumCars(), travelTimes
        return 0, travelTimes

    def getNumCars(self):
        """
            Returns the total number of cars in the system.
        """
        return sum([light.getNumCars() for light in self.lights])

    def getCarWaits(self, time):
        """
            Returns the total wait time for that time step
        """
        waits = []
        for light in self.lights:
            for queue in light.queues:
                for car in queue.cars:
                    waits.append(time - car.startTime)
        return waits

    def getCarTravelDuration(self, time):
        """
            Returns the total wait time for that time step
        """
        waits = []
        for light in self.lights:
            for queue in light.queues:
                for car in queue.cars:
                    waits.append(time - car.enteredEnvironment)
        return waits

    def toState(self, time):
        """
            returns the state based on the environment
            [
                L1Direction,...,L4Direction,
                f(L1CulmTime(N/S)),f(L1CulmTime(E/W)),...,f(L4CulmTime(N/S)),f(L4CulmTime(E/W)),
                timeOfDay
            ]
        """
        state = [1 if light.directionIsNorthSouth else 0 for light in self.lights]
        for light in self.lights:
            NSTotalTime, EWTotalTime = light.getWaitTimes(time, sum(self.getCarWaits(time)))
            state += [NSTotalTime, EWTotalTime]
        return state


    def getCost(self, time):
        return sum(self.toState(time)[4:])

    def __simpleLoopy(self):
        loop = []
        for i in range(ENV_CONSTANTS["EPISODE_LENGTH"]//4):
            loop+=["s","e","n","w"]
        loop+="w"
        loopyCar = [(0,2)]+loop
        return [loopyCar]

    def __loopy(self):
        loop = []
        for i in range(10):
            loop+=["s","e","n","w"]
        loop+="w"
        loopyCar = [(0,2)]+loop
        return [loopyCar]

    def generateRoutes(self):
        """
            Returns a list of all possible routes a car can take
            Each route is a list where the first element is a tuple (start light, queue direction) and
            subsequent elements are optimal actions for the car
        """
        route  = ENV_CONSTANTS["ROUTE"]
        if route == "loopy":
            print("Loopy: Each car does a loop 30 times and exits")
            return self.__loopy()
        elif route == "simpleLoopy":
            print("simpleLoopy: Each car loops forever")
            return self.__simpleLoopy()

       #====================== Helper functions here ======================
        def BFS(g, startPoint, endPoint):
            """
                Breadth first search for directed graph with no weights
            """
            explored = []
            queue = [[startPoint]]

            if startPoint == endPoint:
                return []

            while queue:
                path = queue.pop(0)
                node = path[-1]
                if node not in explored:
                    neighbours = g[node]
                    # Expand to neighbours and check if we have a complete path
                    for neighbour in neighbours:
                        newPath = list(path) + [neighbour]
                        queue.append(newPath)
                        if neighbour == endPoint:
                            return newPath

                    explored.append(node)

            return []

        allRoutes = []

        def getExitAction(exitPoint):
            """
                Returns finaction a car should take to exit at the correct location
            """
            if exitPoint == 1 or exitPoint == 2:
                return "n"
            elif exitPoint == 3 or exitPoint == 4:
                return "e"
            elif exitPoint == 5 or exitPoint == 6:
                return "s"
            else:
                return "w"

        

        # Construct graph with the value of each vertex being a list of its neighbours
        # Vertices 1, 2, 3, etc. are start/exit points beginning from the NW light north point
        # going clockwise. Light queues are represented as a tuple (Traffic light, direction)
        NW = ENV_CONSTANTS["LIGHT_POSITIONS"]["NW"]
        NE = ENV_CONSTANTS["LIGHT_POSITIONS"]["NE"]
        SE = ENV_CONSTANTS["LIGHT_POSITIONS"]["SE"]
        SW = ENV_CONSTANTS["LIGHT_POSITIONS"]["SW"]

        graph = {(NW, "s"): [8, (NE, "e"), (SW, "s")],
                 (NW, "w"): [NE, 8, (SW, "s")],
                 (NW, "n"): [8, 1, (NE, "e")],
                 (NW, "e"): [1, (NE, "e"), (SW, "s")],
                 (NE, "s"): [3, (SE, "s"), (NW, "w")],
                 (NE, "w"): [2, (NW, "w"), (SE, "s")],
                 (NE, "n"): [3, 2, (NW, "w")],
                 (NE, "e"): [2, 3, (SE, "s")],
                 (SE, "s"): [4, 5, (SW, "w")],
                 (SE, "w"): [5, (NE, "n"), (SW, "w")],
                 (SE, "n"): [4, (NE, "n"), (SW, "w")],
                 (SE, "e"): [5, 4, (NE, "n")],
                 (SW, "s"): [7, 6, (SE, "e")],
                 (SW, "w"): [6, 7, (NW, "n")],
                 (SW, "n"): [7, (NW, "n"), (SE, "e")],
                 (SW, "e"): [6, (SE, "e"), (NW, "n")],
                 1: [(NW, "s")],
                 2: [(NE, "s")],
                 3: [(NE, "w")],
                 4: [(SE, "w")],
                 5: [(SE, "n")],
                 6: [(SW, "n")],
                 7: [(SW, "e")],
                 8: [(NW, "e")]}

        # Shortest point from point a to point b can be found with BFS
        # "Start points" are represented as ints in the graph whereas queues are represented as tuples
        for start in set(graph.keys()):
            if type(start) is int: 
                # Exclude start point and iterate through every possible end point
                newGraph = {k: graph[k] for k in set(
                    list(graph.keys())) - set([start])}
                for end in set(newGraph.keys()):
                    if type(end) is int:
                        route = BFS(graph, start, end)
                        allRoutes.append(route)

        # Currently a route has the form [start, (light, dir), ..., (light, dir), end]. We need to
        # modify the list so that each route has first element as tuple (starting light, direction) and
        # subsequent elements as actions.
        # e.g. [(0, 2), "s", "s"] ==> starting at NW light's queue facing south, go south, go south
        dirs = ENV_CONSTANTS["QUEUE_DIR"]
        for idx, route in enumerate(allRoutes):
            newRoute = []
            startLight = route[1][0]  # Second element in route is a tuple, first element in tuple is light
            startDir = dirs[route[1][1]]  # Second element in tuple is queue direction
            newRoute.append((startLight, startDir))  # Add on starting light and queue
            for queue in route[2:-1]:
                newRoute.append(queue[1])  # Add on action
            newRoute.append(getExitAction(route[-1]))
            allRoutes[idx] = newRoute
            
        return allRoutes

    def __str__(self):
        NW  = ENV_CONSTANTS["LIGHT_POSITIONS"]["NW"]
        NE  = ENV_CONSTANTS["LIGHT_POSITIONS"]["NE"]
        SE  = ENV_CONSTANTS["LIGHT_POSITIONS"]["SE"]
        SW  = ENV_CONSTANTS["LIGHT_POSITIONS"]["SW"]
        n   = ENV_CONSTANTS["QUEUE_DIR"]["n"]
        e   = ENV_CONSTANTS["QUEUE_DIR"]["e"]
        s   = ENV_CONSTANTS["QUEUE_DIR"]["s"]
        w   = ENV_CONSTANTS["QUEUE_DIR"]["w"]

        nw = self.lights[NW]
        ne = self.lights[NE]
        sw = self.lights[SW]
        se = self.lights[SE]
        to_str = "\t   {} \t\t  {}\n".format(
            nw.queues[s].getNumCars(), ne.queues[s].getNumCars())
        to_str += "\t {} NW {}\t\t{} NE {}\n".format(nw.queues[e].getNumCars(
        ), nw.queues[w].getNumCars(), ne.queues[e].getNumCars(), nw.queues[w].getNumCars())
        to_str += "\t   {}\t\t  {}\n".format(
            nw.queues[n].getNumCars(), ne.queues[n].getNumCars())
        to_str += "\n\n"
        to_str += "\t   {} \t\t  {}\n".format(
            sw.queues[s].getNumCars(), se.queues[s].getNumCars())
        to_str += "\t {} SW {}\t\t{} SE {}\n".format(sw.queues[e].getNumCars(
        ), sw.queues[w].getNumCars(), se.queues[e].getNumCars(), sw.queues[w].getNumCars())
        to_str += "\t   {} \t\t  {}".format(sw.queues[n].getNumCars(), se.queues[n].getNumCars())
        return to_str


In [8]:
class Main():
    def __init__(self, visualizerCallback=None):
        self.environment = Environment(0)
        self.visualizerCallback = visualizerCallback
    
    def startSimulation(self, route=None):
        saveFileName = FILES["SAVE_FILE"]
        response = None
        while not response:
            response = input("Save qTable to SAVE_FILE: {}? (yes/no) ".format(saveFileName)).strip().lower()
            if response == "yes":
                # Continue with saving logic
                print("Continuing with save...")
            elif response == "no":
                print("Aborting simulation. Change SAVE_FILE in config.py")
                exit()
            else:
                print("Invalid input. Please enter 'yes' or 'no'.")
                response = None  # Reset response to force re-entry

        continueTraining = False
        response = None
        while not response:
            response = input("Continue training from LOAD_FILE: {}? (yes/no) ".format(FILES["LOAD_FILE"])).strip().lower()
            if response == "yes":
                continueTraining = True
            elif response == "no":
                print("Training from scratch")
            else:
                print("Invalid input. Please enter 'yes' or 'no'.")
                response = None  # Reset response to force re-entry

        self.agent = Agent(self.environment, continueTraining=continueTraining)
        saveFileFunction = lambda: self.saveQTable(self.agent.qTable, saveFileName)
        rewardHistory, carsHistory, avgTravelTimes = self.runSimulation(self.agent, True, saveFile=saveFileFunction)

    def culminativeCO2(self, travelTimes):
        return np.cumsum([x * CAR_CONSTS["CO2_PER_TICK"] for x in travelTimes])

    def runSimulation(self, agent,resetOnDay=True, loadFile=None, saveFile=None,naive=False,learn=True):
        routes = self.environment.generateRoutes()

        #========================================================================#
        #                       ~   START SIMULATION   ~                         #
        #========================================================================#
        stateTracker = set(self.agent.qTable.keys())
        
        carsHistory = []
        waitTimeList = []
        avgTravelTimes = [] # average travel times by separated day
        print("Starting simulation. Num epochs {}".format(ENV_CONSTANTS["NUM_YEARS"]*ENV_CONSTANTS["NUM_DAYS"]))
        avg = 0
        for year in range(ENV_CONSTANTS["NUM_YEARS"]):
            sumWaitTime = 0
            yearHistory = []
            for day in range(ENV_CONSTANTS["NUM_DAYS"]):
                self.environment = Environment(0)
                agent.environment = self.environment
                dayTravels = []
                for time in range(ENV_CONSTANTS["EPISODE_LENGTH"]):
                    """
                    Steps 
                        1) Read in state
                        2) make a decision
                        3) observe reward -> environment.update()
                        4) update q table for old state using new reward.

                    """
                    state = self.environment.toState(time)
                    action = agent.updateLights(time)
                    waitTimes, travels = self.environment.update(time,routes)
                    sumWaitTime += waitTimes
                    waitTimeList.append(waitTimes)
                    dayTravels += travels
                    newState = self.environment.toState(time+1)
                    if learn: _ = agent.updateQTable(state,newState,action,waitTime=waitTimes)
                    stateTracker.add(str(state))
                    yearHistory.append(waitTimes)
                    carsHistory.append(self.environment.getNumCars())
                    
                    avg = self.approxRollingAvg(avg, waitTimes)
                    if self.visualizerCallback is not None:
                        self.visualizerCallback(self.environment, time, False, avg, self.environment.isRushHour)
                dayTravels += self.environment.getCarTravelDuration(ENV_CONSTANTS["EPISODE_LENGTH"]) # get the rest of the waits in the environment
                avgTravelTimes.append(sum(dayTravels)/len(dayTravels)) # Add average of the day's travels 
            yearHistory = np.array(yearHistory)
            print("Finished year {},  \tavg cost: {:.4f}".format(year+1,np.mean(yearHistory)))
            percVisited = (len(stateTracker)/agent.numStates)*100
            print("\t-> states visisted: {}, % visited: {:.4f}%".format(len(stateTracker),percVisited))
            if learn: saveFile()
            # print("\t-> travel times: {}".format(avgTravelTimes))
        return waitTimeList, carsHistory, avgTravelTimes

    def saveQTable(self, qTable,filePath):
        print("\t-> model saved to {}".format(filePath))
        if not os.path.exists(os.path.dirname(filePath)):
            try:
                os.makedirs(os.path.dirname(filePath))
            except OSError as exc: # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise
        test = json.dumps(qTable,sort_keys=True, indent=4,cls=NumpyEncoder)
        # Write-Overwrites 
        file1 = open(filePath,"w")#write mode 
        file1.write(test) 
        file1.close() 

    def approxRollingAvg(self, avg, newCost):
        N = 30
        avg -= avg / N
        avg += newCost / N
        return avg

    



class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

if __name__ == "__main__":
    main = Main()
    main.startSimulation()

Continuing with save...
Loaded qTable from qTables/egreedy_normal.json
Initial qTable length: 1904
Starting simulation. Num epochs 10
Finished year 1,  	avg cost: 1.5853
	-> states visisted: 2215, % visited: 2.1100%
	-> model saved to qTables/egreedy_normal.json
