In [1]:
# !pip install tensorflow

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, LSTM, concatenate
from tensorflow.keras.optimizers import Adam
from collections import deque
import random
from scipy.stats import poisson

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, concatenate
from tensorflow.keras.optimizers import Adam
from collections import deque
import random
from scipy.stats import poisson

class SafeAlphaAvellanedaStoikov:
    def __init__(self, gamma=0.1, sigma=0.05, T=1.0, k=150, M=0.02, dt=0.005,
                 max_inventory=5, price_range=0.1, max_spread_pct=0.05,
                 verbose=False, memory_size=10000, batch_size=32):
        """
        
        Parameters:
        - gamma: Initial risk aversion
        - sigma: Volatility estimate
        - T: Total time horizon
        - k: Liquidity parameter
        - M: Maximum order size
        - dt: Time step
        - max_inventory: Maximum position size (long/short)
        - price_range: Price bounds as percentage from mid price (0.1 = 10%)
        - max_spread_pct: Maximum spread as percentage of mid price
        - verbose: Print debug information
        """
        
        # Core parameters
        self.gamma = max(0.0001, gamma)
        self.sigma = max(0.01, sigma)
        self.T = T
        self.k = max(10, k)
        self.M = max(0.001, M)
        self.dt = dt
        
        # Safety limits
        self.max_inventory = max(1, max_inventory)
        self.price_range = price_range
        self.max_spread_pct = max_spread_pct
        
        # RL parameters
        self.action_space = [
            (0.01, 0),    # Low risk, no skew
            (0.1, -0.02), # Mild risk, slight negative skew
            (0.1, 0.02),  # Mild risk, slight positive skew
            (0.2, 0),     # Moderate risk, no skew
            (0.5, 0)      # High risk, no skew
        ]
        self.n_actions = len(self.action_space)
        self.memory = deque(maxlen=memory_size)
        self.batch_size = batch_size
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.999
        self.gamma_dqn = 0.95
        
        # State tracking
        self.mid_prices = deque(maxlen=100)
        self.inventory_history = deque(maxlen=10)
        self.current_gamma = gamma
        self.current_skew = 0
        self.verbose = verbose
        self.A = np.exp(k * M / 2) / dt
        
        # Initialize neural network
        self._build_model()
        self.reset()

    def _build_model(self):
        state_input = Input(shape=(3,))  # inventory, price_change, spread
        x = Dense(64, activation='relu')(state_input)
        x = Dense(64, activation='relu')(x)
        output = Dense(self.n_actions, activation='linear')(x)
        
        self.model = Model(inputs=state_input, outputs=output)
        self.target_model = Model(inputs=state_input, outputs=output)
        self.model.compile(optimizer=Adam(0.001), loss='mse')
        self.update_target_model()

    def reset(self):
        self.inventory = 0
        self.cash = 0
        self.current_time = 0
        self.last_pnl = 0
        self.mid_prices.clear()
        self.inventory_history.clear()
        
        self.history = {
            'mid_price': [],
            'bid_price': [],
            'ask_price': [],
            'inventory': [],
            'pnl': [],
            'gamma': [],
            'skew': []
        }

    def get_state(self):
        if len(self.mid_prices) < 2:
            return np.array([0, 0, 0])
        
        price_change = (self.mid_prices[-1] - self.mid_prices[-2]) / self.mid_prices[-2]
        avg_inventory = np.mean(self.inventory_history) if self.inventory_history else 0
        current_spread = (self.ask_price - self.bid_price) / self.mid_prices[-1]
        
        return np.array([
            avg_inventory / self.max_inventory,
            price_change * 100,  # percentage
            current_spread * 100  # percentage
        ])

    def calculate_optimal_spreads(self, mid_price):
        time_left = max(0.001, self.T - self.current_time)
        
        # Base spread components
        spread_vol = 0.5 * self.current_gamma * self.sigma**2 * time_left
        spread_liq = (1/self.current_gamma) * np.log(1 + self.current_gamma/self.k)
        
        # Inventory adjustment
        inv_ratio = self.inventory / self.max_inventory
        inventory_adj = 0.3 * spread_liq * np.tanh(3 * inv_ratio)
        
        # Calculate raw deltas
        delta_b = spread_vol + inventory_adj + spread_liq
        delta_a = spread_vol - inventory_adj + spread_liq
        
        # Apply price limits
        min_bid = mid_price * (1 - self.price_range)
        max_ask = mid_price * (1 + self.price_range)
        max_spread = mid_price * self.max_spread_pct
        
        # Calculate final prices with skew
        bid_price = max(min_bid, (mid_price - delta_b) * (1 + self.current_skew))
        ask_price = min(max_ask, (mid_price + delta_a) * (1 + self.current_skew))
        
        # Enforce maximum spread
        if (ask_price - bid_price) > max_spread:
            mid = (bid_price + ask_price)/2
            bid_price = max(min_bid, mid - max_spread/2)
            ask_price = min(max_ask, mid + max_spread/2)
        
        return bid_price, ask_price

    def calculate_reward(self, mid_price):
        current_pnl = self.cash + self.inventory * mid_price
        delta_pnl = current_pnl - self.last_pnl
        
        # Penalties and rewards
        inv_penalty = (self.inventory / self.max_inventory)**2 * 0.1 * abs(delta_pnl)
        spread_reward = 0.01 * (1 - (self.ask_price - self.bid_price)/mid_price)
        
        reward = delta_pnl - inv_penalty + spread_reward
        self.last_pnl = current_pnl
        return reward

    def select_action(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randint(0, self.n_actions - 1)
        
        q_values = self.model.predict(state.reshape(1, -1), verbose=0)
        return np.argmax(q_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
            
        minibatch = random.sample(self.memory, self.batch_size)
        states = np.array([x[0] for x in minibatch])
        next_states = np.array([x[3] for x in minibatch])
        
        # Predict Q-values
        current_q = self.model.predict(states, verbose=0)
        next_q = self.target_model.predict(next_states, verbose=0)
        
        # Update Q-values
        for i, (state, action, reward, _, done) in enumerate(minibatch):
            if done:
                current_q[i][action] = reward
            else:
                current_q[i][action] = reward + self.gamma_dqn * np.max(next_q[i])
        
        # Train the model
        self.model.fit(states, current_q, verbose=0, epochs=1)
        
        # Decay exploration rate
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def step(self, mid_price, ask_price, ask_qty, bid_price, bid_qty):
        # Update market data
        self.mid_prices.append(mid_price)
        self.current_time += self.dt
        
        # Select action
        state = self.get_state()
        action_idx = self.select_action(state)
        self.current_gamma, self.current_skew = self.action_space[action_idx]
        
        # Calculate quotes
        self.bid_price, self.ask_price = self.calculate_optimal_spreads(mid_price)
        
        # Simulate executions
        spread_b = max(0.0001, mid_price - self.bid_price)
        spread_a = max(0.0001, self.ask_price - mid_price)
        
        lambda_b = self.A * np.exp(-self.k * spread_b)
        lambda_a = self.A * np.exp(-self.k * spread_a)
        
        # Execute orders with inventory limits
        bid_executed = (poisson.rvs(lambda_b * self.dt) > 0 and 
                       self.inventory < self.max_inventory)
        ask_executed = (poisson.rvs(lambda_a * self.dt) > 0 and 
                       self.inventory > -self.max_inventory)
        
        # Update positions
        if bid_executed:
            self.inventory += 1
            self.cash -= self.bid_price
        if ask_executed:
            self.inventory -= 1
            self.cash += self.ask_price
        
        self.inventory_history.append(self.inventory)
        
        # Calculate reward
        reward = self.calculate_reward(mid_price)
        next_state = self.get_state()
        done = self.current_time >= self.T
        
        # Store experience and train
        self.remember(state, action_idx, reward, next_state, done)
        if len(self.memory) >= self.batch_size:
            self.replay()
        
        # Update target network periodically
        if self.current_time % (self.T/10) < self.dt:
            self.update_target_model()
        
        # Record history
        self.history['mid_price'].append(mid_price)
        self.history['bid_price'].append(self.bid_price)
        self.history['ask_price'].append(self.ask_price)
        self.history['inventory'].append(self.inventory)
        self.history['pnl'].append(self.cash + self.inventory * mid_price)
        self.history['gamma'].append(self.current_gamma)
        self.history['skew'].append(self.current_skew)
        
        if self.verbose and self.current_time % 100 == 0:
            print(f"Time: {self.current_time:.1f} | "
                  f"Mid: {mid_price:.4f} | "
                  f"Bid/Ask: {self.bid_price:.4f}/{self.ask_price:.4f} | "
                  f"Inventory: {self.inventory} | "
                  f"PnL: {self.history['pnl'][-1]:.2f}")
        
        return bid_executed, ask_executed

    def get_performance_metrics(self):
        return pd.DataFrame(self.history)

In [5]:
def load_order_book_data(data_dir):
    all_files = sorted([os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith(".csv.gz")])
    
    df_list = []
    
    for file in all_files:
        df_tmp = pd.read_csv(file)
    
        # Keep only top 5 levels
        keep_cols = ['timestamp', 'local_timestamp', 'exchange', 'symbol']
        for i in range(5):
            keep_cols += [f'asks[{i}].price', f'asks[{i}].amount', f'bids[{i}].price', f'bids[{i}].amount']
        
        df_tmp = df_tmp[keep_cols]
        df_list.append(df_tmp)
    
    df = pd.concat(df_list, ignore_index=True)  
    return df

In [None]:
import os
import pandas as pd
import numpy as np
# from alpha_avellaneda_stoikov import AlphaAvellanedaStoikov

def load_and_preprocess_data(data_dir):
    df = load_order_book_data(data_dir)
    
    # Calculate mid price from top of book
    df['mid_price'] = (df['bids[0].price'] + df['asks[0].price']) / 2
    
    # Calculate microprice (weighted mid price)
    df['microprice'] = (df['bids[0].price'] * df['asks[0].amount'] + 
                        df['asks[0].price'] * df['bids[0].amount']) / \
                       (df['asks[0].amount'] + df['bids[0].amount'])
    
    # Calculate order book imbalance
    df['imbalance'] = (df['bids[0].amount'] - df['asks[0].amount']) / \
                      (df['bids[0].amount'] + df['asks[0].amount'])
    
    # Calculate spread
    df['spread'] = df['asks[0].price'] - df['bids[0].price']
    
    # For candles - group by time period (e.g., 1 minute)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index('timestamp', inplace=True)
    
    # Create candles
    candles = df['mid_price'].resample('1T').agg({
        'close': 'last',
        'high': 'max',
        'low': 'min',
        'open': 'first'
    })
    
    return df, candles

def run_subset_of_data(data_dir, num_points=100, start_point=0):
    # Load full data but only process a subset
    full_order_book, candles = load_and_preprocess_data(data_dir)
    
    # Select subset
    order_book = full_order_book.iloc[start_point:start_point+num_points].copy()
    candle_mask = (candles.index >= order_book.index[0]) & \
                  (candles.index <= order_book.index[-1])
    candles_subset = candles[candle_mask].copy()
    
    print(f"Running on subset: {len(order_book)} order book updates "
          f"({order_book.index[0]} to {order_book.index[-1]})")
    
    mm = SafeAlphaAvellanedaStoikov(
        max_inventory=10,
        price_range=0.1,  # 10% price bounds
        max_spread_pct=0.05,  # 5% max spread
        verbose=True
    )
    
    # Run through market data
    for i, row in order_book.iterrows():
        mm.step(
            mid_price=row['mid_price'],
            ask_price=row['asks[0].price'],
            ask_qty=row['asks[0].amount'],
            bid_price=row['bids[0].price'], 
            bid_qty=row['bids[0].amount']
        )
    
    # Get results
    results = mm.get_performance_metrics()
    return results  # Add this line to return the results

        


# Example usage:
if __name__ == "__main__":
    data_directory = "/Users/diananigmatullina/Downloads/data_thesis"
    
    # Run on first 10,000 points
    results1 = run_subset_of_data(data_directory, num_points=100, start_point=0)
    
    # Run on next 10,000 points (points 10,000-20,000)
    results2 = run_subset_of_data(data_directory, num_points=10000, start_point=10000)
    
    # Run on specific time period
    start_time = pd.to_datetime("2023-01-01 12:00:00")
    end_time = pd.to_datetime("2023-01-01 13:00:00")
    results3 = run_time_subset(data_directory, start_time, end_time)