In [255]:
'''
    Given constraints around track and train costs, population, train capacity, etc...
    try to maximize cash in an episode. Cash is received after a passenger disembarks 
    at their destination (to prevent simply loading them all on trains).
    
    Tracks are assumed to be circular, and trains will move from their current station, to the track,
    to the end station, one location per tick. Consequently tracks may support up to 4 trains
    
    ex:
      ->
    A    B
      <-
      
    Passengers will spend 1 tick at each location, before queueing at the station.
    
    Trains will take the first n passengers from the queue when they are at the station. Loading 
    and unloading take place in one tick.
    
    Actions available are build_track, and build_train.
    
    Populations may change over time.
'''


class person:
    def __init__(self, home, work, g):
        self.home = home
        self.work = work
        self.location = home
        self.destination = work
        self.should_move = False
        self.queued = False
        self.boarded=False
        self.train = None
        
    def action(self):
        if not self.should_move:
            self.should_move = True
            if self.location == self.home:
                self.destination = self.work
            else:
                self.destination = self.home
        elif self.location == "->" or self.location == "<-":
            pass
        elif not self.queued:
            g.queue(self, self.location)
            self.queued = True
        elif self.queued:
            for t in g.trains:
                if t.boardable and t.location == self.location and t.destination == self.destination:
                    t.board(self)
                    self.boarded = True
                    self.queued = False
                break
        elif self.boarded:
            self.location = self.train.location
            if self.location == self.destination:
                self.train.disembark(self)

class train:
    def __init__(self, start, end, capacity, gym):
        self.location = start
        self.capacity = capacity
        self.track = [start, "->", end, "<-"]
        self.occupancy = 0
        self.occupants=[]
        self.g = gym
        self.location_idx = 0
        self.boardable = True
        self.destination = end
        self.start = start
        self.end = end

    def action(self):
        # TODO Only move if people on train or waiting at destination
        waiting = sum(1 for i in g._pop if (i.location == self.destination) and i.queued)
        if self.occupancy > 0 and waiting > 0:
            l = len(self.track)
            i = self.location_idx +1
            i = i % l
            self.location_idx = i
            self.location = self.track[self.location_idx]
            if self.location == self.start:
                self.destination = self.end
            elif self.location == self.end:
                self.destination = self.start
    
    def board(self, p):
        if self.boardable:
            self.occupants.append(p)
            p.train=self
            self.occupancy = len(self.occupants)
            self.boardable = self.occupancy < self.capacity
            self.g.station_queues[0].remove(p)
    
    def disembark(self, p):
        self.occupants.remove(p)
        self.occupancy = len(self.occupants)
        self.boardable = self.occupancy < self.capacity
        p.train = None
        
class gym:
    def __init__(self):
        self.train_cost = 1
        self.track_cost = 1
        self.stations = ["A", "B"]
        self.station_queues = [[],[]]
        self.tracks = []
        self.trains = []
        self.population = [50,50]
        self.cash = 50
        self.train_capacity = 10
        self.fare = 1
        self.track_maintenance = 1
        
        self._pop=[]
        idx=0
        for p in self.population:
            for i in range(0,p):
                new_person = person(self.stations[idx], self.stations[idx-1], self)
                self._pop.append(new_person)
            idx +=1
                
    
    def queue(self, person, location):
        self.station_queues[0].append(person)
        
    def step(self):
        for p in self._pop:
            p.action()
        for t in self.trains:
            t.action()

    def build_train(self, start, end, capacity):
        self.trains.append(train(start, end, capacity, self))
        self.cash -= self.train_cost
    def build_track(self, start, end):
        self.tracks.append(""+start+end)
        self.cash-= self.track_cost

In [256]:
g = gym()
g.build_train("A", "B", 20)
for i in range(0,5):
    g.step()
    print("train at:", g.trains[0].location)
    print("train occupants:", len(g.trains[0].occupants))
    print("waiting:", len(g.station_queues[0]))
    a = sum(1 for i in g._pop if i.location == "A")
    b = sum(1 for i in g._pop if i.location == "B")
    print("At A:", a)
    print("At B:", b)

train at: A
train occupants: 0
waiting: 0
At A: 50
At B: 50
train at: A
train occupants: 0
waiting: 100
At A: 50
At B: 50
train at: ->
train occupants: 20
waiting: 80
At A: 50
At B: 50
train at: B
train occupants: 20
waiting: 100
At A: 50
At B: 50
train at: <-
train occupants: 20
waiting: 100
At A: 50
At B: 50
