In [33]:
# Change notebook width
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:70% !important; }</style>"))

In [38]:
from dataclasses import dataclass
from typing import Tuple, Dict, List, Optional, Iterable, Mapping, Sequence, Callable, Iterator
from rl.markov_decision_process import *
from rl.markov_process import *
from rl.distribution import *
from rl.dynamic_programming import *
import pprint as pp
import numpy as np
import itertools
import time

# Problem 2

## States and Actions

In [39]:
#State is a tuple (t,S,W,I)
@dataclass(frozen=True)
class State:
    t: float # Time step
    S: float # Order Book Mid Price at time t
    W: float # PnL - Prices and Losses at time t.  PnL = Value Today - Value from Prior Day 
    I: float # Inventory (can be + or -) at time t



#An action is a Tuple(bid,ask)
@dataclass(frozen=True)    
class Action:
    Pb: float # Bid Price at time t
    Pa: float # Ask Price at time t

## Functions

In [40]:
# Utility Equation Assumed from Book pg 312
# Given an an input and gamma, gets the utility
def utility(x, γ) -> float:
    return -np.exp(-γ*x)

# Calculates the market maker's Bid Spread (relative to the order book mid)
def calc_δ_b(state, T, γ, σ, k):
    return (.5 * (2*state.I + 1) * γ * σ**2 * (T-state.t)) + ((1/γ) * np.log(1 + γ/k))

# Calculates the market maker's Ask Spread (relative to the order book mid)
def calc_δ_a(state, T, γ, σ, k):
    return (.5 * (1 - 2*state.I) * γ * σ**2 * (T-state.t)) + ((1/γ) * np.log(1 + γ/k))

# Returns an array of rewards, inventory, bids, and asks
def process_dat_trace(trace, num_traces):
    # Initialize empty arrays
    reward_arr = []
    inventory_arr = []
    bid_arr = []
    ask_arr = []
    
    # Loop over each trace
    for count, simulation in enumerate(trace):
        if count > num_traces:
            break
            
        # Get the last item from the simulation
        for item in simulation:
            reward = item.reward
            inventory = item.state.I
            bid = item.action.Pb
            ask = item.action.Pa
            
        # Append values to arrays
        reward_arr.append(reward)
        inventory_arr.append(inventory)
        bid_arr.append(bid)
        ask_arr.append(ask)
        
    return reward_arr, inventory_arr, bid_arr, ask_arr

## Policies

In [41]:
# Naive Policy with a constant Bid-Ask Spread = Average of the Optimal Policy
# The Naive Policy is always symmetric around the Mid Price
class NaivePolicy(Policy[State,Action]):
    T: float
    γ: float
    σ: float
    k: float
    spread:float
    
    def __init__(self, Τ, γ, σ, k, spread):
        self.T = T
        self.γ = γ
        self.σ = σ
        self.k = k
        self.spread = spread
    def act(self, state: State) -> Optional[Distribution[Action]]:
        Pb: float = state.S - self.spread/2
        Pa: float = state.S + self.spread/2
        return Constant(Action(Pb, Pa))


    
# Optimal Policy Derived from class Symmetric around the Indifferent Price
class OptimalPolicy(Policy[State, Action]):
    T: float
    γ: float
    σ: float
    k: float
    
    def __init__(self, Τ, γ, σ, k):
        self.T = T
        self.γ = γ
        self.σ = σ
        self.k = k
        
    def act(self, state: State) -> Optional[Distribution[Action]]:
        δ_b: float = calc_δ_b(state, self.T, self.γ, self.σ, self.k)
        δ_a: float = calc_δ_a(state, self.T, self.γ, self.σ, self.k)
        Pb:  float = state.S - δ_b
        Pa:  float = state.S + δ_a
        return Constant(Action(Pb, Pa))

## MDP

In [42]:
class Market_Making_Problem_MDP(MarkovDecisionProcess[State,Action]):    
    T:  float
    δt: float
    γ:  float
    σ:  float
    k:  float
    c:  float
    naive_spread: float
        
    def __init__(self, T, δt, γ, σ, k, c, naive_spread):
        self.T:  float = T
        self.δt: float = δt
        self.γ:  float = γ
        self.σ:  float = σ
        self.k:  float = k
        self.c:  float = c
        self.naive_spread = naive_spread
        
    def actions(self, state: State) -> Iterable[Action]:
        action_list: Iterable[Action] = [] # Define empty list
            
        # Actions for Naive Policy
        Pb:float = state.S - self.naive_spread/2
        Pa:float = state.S + self.naive_spread/2
        action_list.append(Action(Pb, Pa))
        
        # Actions for Optimal Policy
        δ_b: float = calc_δ_b(state, self.T, self.γ, self.σ, self.k)
        δ_a: float = calc_δ_a(state, self.T, self.γ, self.σ, self.k)
        Pb:  float = state.S - δ_b
        Pa:  float = state.S + δ_a
        action_list.append(Action(Pb, Pa))
        
        return list_actions
    
    def step(
        self,
        state: State,
        action: Action
                                ) -> Optional[Distribution[Tuple[State, float]]]:
        if state.t > self.T:
            return None  
        
        def my_mega_sampler() -> Tuple[State, float]:
            # Setup basic variables
            PnL = state.W
            Inventory = state.I
            
            # Get Market Maker's Bid and Ask Spread
            δb = state.S - action.Pb
            δa = action.Pa - state.S
            
            # Get probabilities of Incrementing and Decrementing Inventory
            prob_inventory_increment: float = self.c * self.δt * np.exp(-self.k*δb)
            prob_inventory_decrement: float = self.c * self.δt * np.exp(-self.k*δa)
            
            # Increment or Decrement Inventory based on probabilities given
            if Inventory >= 1 and np.random.random() < prob_inventory_decrement:
                PnL += action.Pa
                Inventory -=1
            if np.random.random() < prob_inventory_increment:
                PnL -= action.Pb
                Inventory +=1       
                
            # Update Order Book Mid Price
            order_book_mid_price = state.S
            if np.random.random() < 0.5:
                order_book_mid_price += self.σ*np.sqrt(self.δt)
            else:
                order_book_mid_price -= self.σ*np.sqrt(self.δt)
            
            # Set next time step and Next State
            next_t = state.t + self.δt
            next_state = State(t=next_t, S=order_book_mid_price, W=PnL, I=Inventory)
                               
            # Get Reward depending on if the simulation is completed
            if next_state.t >= self.T:
                reward = utility(next_state.W + next_state.I*next_state.S, self.γ)
            else:
                reward = 0
            
            return (next_state, reward)
        
        return SampledDistribution(sampler=my_mega_sampler)

## Solve

In [43]:
# Set Initial Variables
S = 100
T = 1
δt = 0.005
γ = 0.1
σ = 2
I = 0
k = 1.5
c = 140
num_traces = 1000

# Set start state and distribution
t = 0
S = S
W = 0
I = I
start_state = State(t, S, W, I)        
start_state_distribution = Constant(start_state)

# Setup Optimal Model and Policy
optimal_model = Market_Making_Problem_MDP(T, δt, γ, σ, I, k, c)
optimal_policy = OptimalPolicy(T, γ, σ, k)

# Get Optimal Trace and Simulation Arrays
optimal_traces = naive_model.action_traces(start_state_distribution, optimal_policy)
optimal_reward, optimal_inventory, optimal_bids, optimal_asks = process_dat_trace(optimal_traces, num_traces)

# Set Naive Spread as average of bids and asks from optimal trace
naive_spread = np.mean(optimal_bids + optimal_asks)

# Setup Naive Model and Policy
naive_model = Market_Making_Problem_MDP(T, δt, γ, σ, I, k, c)
naive_policy =  NaivePolicy(T, γ, σ, k, naive_spread)

# Get Naive Trace and Simulation Arrays
naive_traces = new_model.action_traces(start_state_distribution, naive_policy)
naive_reward, naive_inventory, naive_bids, naive_asks = process_dat_trace(naive_traces, num_traces)

print("Naive")
print("Reward:    \t", np.mean(naive_reward))
print("Inventory: \t", np.mean(naive_inventory))
print("Bid Price: \t", np.mean(naive_bids))
print("Ask Price: \t", np.mean(naive_asks))
print()
print("Optimal")
print("Reward:    \t", np.mean(optimal_reward))
print("Inventory: \t", np.mean(optimal_inventory))
print("Bid Price: \t", np.mean(optimal_bids))
print("Ask Price: \t", np.mean(optimal_asks))

Naive
Reward:    	 -1.0
Inventory: 	 0.0
Bid Price: 	 50.002988621044395
Ask Price: 	 149.94812847260386

Optimal
Reward:    	 -0.8826705703552483
Inventory: 	 0.9400599400599401
Bid Price: 	 99.29875464018373
Ask Price: 	 100.59152506293516
