# Optimization project Iver & Alessandro 

In [9]:
import gurobipy as gp
from gurobipy import GRB
from enum import Enum
import numpy as np

## Taxi notes

Grid with coordinates 1-25

| 01 | 02 | 03 | 04 | 05 |
| 06 | 07 | 08 | 09 | 10 | etc.

Possible states:
Taxi location 1-25
Taxi empty or not

Possible actions:
N (-5), S (+5), E (+1), W (-1)
Pick up, drop off

Rewards:
-1 per time step
+20 for successfull drop off
-10 for trying to drop off when it cant

Initial state: Taxi at pos 1


Goal:

find a policy $\mu$ that for each state $s$ chooses the optimal action $a$ such that

$$
\mu (s, a) = argmax Q(s, a)
$$



## Linear Models

### 1.

maximize -#steps + 20 * successfull_dropoff - 10 * failed_passenger_operation
s.t.


In [134]:
MAP_SIZE = 5

class Tile(Enum):
    EMPTY = 0
    BUILDING = 1
    PICK_UP = 2
    DROP_OFF = 3

    def __eq__(self, other):
        if  other.__class__ is self.__class__:
            return self.value == other.value
        elif self.value == other:
            return True
        else:
            return False


# Remember to index as [y, x]
mappy = np.array(
[
    [0, 1, 0, 0, 0,],
    [0, 0, 0, 1, 0,],
    [0, 1, 0, 1, 3,],
    [0, 1, 0, 0, 0,],
    [2, 1, 1, 0, 0,]
])

mappy = mappy.T

In [135]:
class Action(Enum):
    MOVE_SOUTH = 1
    MOVE_NORTH = 2
    MOVE_EAST = 3
    MOVE_WEST = 4
    PICK_UP = 5
    DROP_OFF = 6


class FaultyPassengerAction(Exception):
    pass

class FaultyMove(Exception):
    pass

def do_action(x, y, passenger, action) -> tuple[int, int, bool, Exception | None]:
    if action == Action.PICK_UP:
        if passenger or mappy[y, x] != Tile.PICK_UP:
            return x, y, passenger, FaultyPassengerAction
        return x, y, True, None
    elif action == Action.DROP_OFF:
        if not passenger or mappy[y, x] != Tile.DROP_OFF:
            return x, y, passenger, FaultyPassengerAction
        return x, y, False, None

    nx, ny = x, y
    if action == Action.MOVE_NORTH:
        ny -= 1
    elif action == Action.MOVE_SOUTH:
        ny += 1
    elif action == Action.MOVE_EAST:
        nx += 1
    elif action == Action.MOVE_WEST:
        nx -= 1

    if nx < 0 or ny < 0 or nx >= MAP_SIZE or ny >= MAP_SIZE:
        return x, y, passenger, FaultyMove
    elif mappy[ny, nx] == Tile.BUILDING:
        return x, y, passenger, FaultyMove
    else:
        return nx, ny, passenger, None

def get_idx(x: int, y: int, passenger: int | bool) -> int:
    return x + y * MAP_SIZE + int(passenger) * MAP_SIZE * MAP_SIZE

def get_state(idx: int) -> tuple[int, int, bool]:
    passenger = idx // (MAP_SIZE * MAP_SIZE)
    rest = idx % (MAP_SIZE * MAP_SIZE)
    y = rest // MAP_SIZE
    x = idx % MAP_SIZE
    return x, y, bool(passenger)

_tst_state = 2, 3, True
_tst_state_2 = get_state(get_idx(_tst_state[0], _tst_state[1], _tst_state[2]))
assert _tst_state_2 == _tst_state

def iterstates():
    for x in range(MAP_SIZE):
        for y in range(MAP_SIZE):
            if mappy[y, x] == Tile.BUILDING:
                # Skip all buildings
                continue
            for passenger in [0, 1]:
                yield x, y, passenger

In [148]:
m = gp.Model("taxi_driver")
# m.params.LogToConsole = 0

# Vs[y, x, passenger] => Value of state
Vs = np.empty((MAP_SIZE * MAP_SIZE * 2), dtype=object)
# choices[y, x, p, ny, nx, np] => probability of transition
rewards = np.zeros((MAP_SIZE * MAP_SIZE * 2, MAP_SIZE * MAP_SIZE * 2))
gamma = 0.99

for x, y, passenger in iterstates():
    # Insert a state
    Vs[get_idx(x, y, passenger)] = m.addVar()
for x, y, passenger in iterstates():
    # Create list of all transitions away from this given state
    state_actions = []
    for action in Action:
        reward = -1  # For using a timestep

        nx, ny, npass, error = do_action(x, y, passenger, action)
        if error == FaultyPassengerAction:
            reward -= 10
        elif action == Action.DROP_OFF:
            # Successfull drop off
            reward += 20
        elif error == FaultyMove:
            continue  # Skip all impossible moves

        rewards[get_idx(x, y, passenger), get_idx(nx, ny, npass)] = reward
        m.addConstr(Vs[get_idx(x, y, passenger)] >= reward + gamma * Vs[get_idx(nx, ny, npass)])

m.setObjective(
    (1 - gamma) * gp.quicksum(Vs[Vs != None]),
    sense=GRB.MINIMIZE,
)

m.optimize()

print()
for passenger in [0, 1]:
    print("With passenger:") if passenger else print("No passenger:")
    for x in range(MAP_SIZE):
        for y in range(MAP_SIZE):
            if mappy[y, x] == Tile.BUILDING:
                print(f"  B  ", end=" ")
            else:
                print(f"{Vs[get_idx(x, y, passenger)].X:05.2f}", end=" ")
        print()
    print()

# x, y, png = 0, 3, False

# visited = set((x, y, png))
# while True:
#     todo = None
#     for i in range(len(choices[get_idx(x, y, png),:])):
#         ch = choices[get_idx(x, y, png), i]
#         if type(ch) == gp.Var:
#             if ch.X != 0 and ch.X != 1:
#                 print(f"at {x, y, png, i} we have undetermined choice")
#             if ch.X == 1:
#                 todo = i
#                 break
#     x, y, png = get_state(todo)
#     print(x, y, png)
#     if (x, y, png) in visited:
#         break
#     visited.add((x, y, png))

if False:
    for i in range(choices.shape[1]):
        for j in range(choices.shape[0]):
            ch = choices[j, i]
            if type(ch) == gp.Var:
                print(ch.X, end=" ")
            else:
                print(0.0, end=" ")
        print()

Gurobi Optimizer version 10.0.3 build v10.0.3rc0 (win64)

CPU model: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz, instruction set [SSE2|AVX|AVX2|AVX512]
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 148 rows, 36 columns and 226 nonzeros
Model fingerprint: 0xc1ba4351
Coefficient statistics:
  Matrix range     [1e-02, 1e+00]
  Objective range  [1e-02, 1e-02]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e+00, 2e+01]
Presolve removed 70 rows and 0 columns
Presolve time: 0.00s
Presolved: 78 rows, 36 columns, 156 nonzeros

Iteration    Objective       Primal Inf.    Dual Inf.      Time
       0    5.3405458e-01   4.988081e+01   0.000000e+00      0s
      27    2.7806593e+00   0.000000e+00   0.000000e+00      0s

Solved in 27 iterations and 0.01 seconds (0.00 work units)
Optimal objective  2.780659288e+00

No passenger:
02.35   B   00.31 00.00 00.00 
03.38 02.35 01.32   B   00.00 
04.43   B   00.31   B   00.00 
05.48   B   00.0

In [145]:
"""
Assumptions:
  - Every draw is independent of the previous draws,
    meaning the probability of drawing 5 (or 10 000) of the same card is nonzero.
  - The player has a fixed policy of always drawing until his score is 12 or higher,
    because below that it is impossible to bust.

"""
class BJAction(Enum):
    Hit = 0
    Stand = 1

def draw_card():
    rn = np.random.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
    if rn in [11, 12, 13]: # Face cards
        rn = 10
    return rn

POSSIBLE_SUMS = 10 # [12, ..., 21]
POSSIBLE_DEALER_FACES = 10 # [1, ..., 10]

m2 = gp.Model("blackjack_bajillionaire")

Vs = np.empty((POSSIBLE_SUMS * POSSIBLE_DEALER_FACES), dtype=object)


