In [3]:
import numpy as np


In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

price_data = pd.read_csv('round-1-island-data-bottle/prices_round_1_day_0.csv', delimiter=';', parse_dates=['timestamp'])
price_data = price_data.drop(columns=['day'])
price_data = price_data[price_data['product'] == 'STARFRUIT']
#add prev bid and ask volume for easy calculation of order flow
price_data['prev_bid_vol'] = price_data['bid_volume_1'].shift(1)
price_data['prev_ask_vol'] = price_data['ask_volume_1'].shift(1)

#add rip indicator
price_data['bt_rip_indicator'] = price_data['mid_price'].pct_change(periods=5) >= 0.05
price_data['bt_rip_indicator'] = price_data['bt_rip_indicator'].astype(int)
#add x_vol 
price_data['bt_x_vol'] = price_data['mid_price'].rolling(window=10).std()
#add vol_ratio
price_data['bt_vol_ratio'] = price_data['mid_price'].rolling(window=10).std().rolling(window=10).apply(lambda x: (max(x) - min(x)) / np.mean(x))

price_data.columns = [col.strip() for col in price_data.columns]  #delete any extra spaces


# print(price_data.head())


  price_data = pd.read_csv('round-1-island-data-bottle/prices_round_1_day_0.csv', delimiter=';', parse_dates=['timestamp'])


In [33]:
global returns

returns = [0]

def bt_spread(row):
    best_bid = row['bid_price_1']
    best_ask = row['ask_price_1']
    mid_price = (best_ask + best_bid) / 2
    spread = (best_ask - best_bid) / mid_price
    return spread

def bt_orderbook_imbalance(row):
    max_ask_depth = row['ask_volume_1']
    max_bid_depth = row['bid_volume_1']
    total_depth = max_ask_depth + max_bid_depth

    normalized_imbalance = (max_ask_depth - max_bid_depth) / total_depth if total_depth != 0 else 0
    

    return normalized_imbalance

def bt_orderbook_imbalance_ratio(row):
    max_ask_depth = row['ask_volume_1']
    max_bid_depth = row['bid_volume_1']

    max_ratio = max_ask_depth / max_bid_depth if max_bid_depth != 0 else float('inf')

    return max_ratio

def bt_bid_order_flow(row):
    bid_flow = row['bid_volume_1'] - row['prev_bid_vol']
    return bid_flow

def bt_ask_order_flow(row):
    ask_flow = row['ask_volume_1'] - row['prev_ask_vol']
    return ask_flow

def bt_order_flow_imbalance(row):
    bid_flow, ask_flow = bt_bid_order_flow(row), bt_ask_order_flow(row)
    return bid_flow - ask_flow
# def bt_rip_indicator(df, x, y):
#     returns = df['price'].pct_change(periods=x)  # calculate percent change over x periods
#     indicator = (returns >= y).astype(int)
#     return indicator

# def bt_x_vol(df, x):
#     return df['price'].rolling(window=x).std()

# def bt_vol_ratio(df, x):
#     rolling_std = df['price'].rolling(window=x).std()
#     max_vol = rolling_std.max()
#     min_vol = rolling_std.min()
#     avg_vol = rolling_std.mean()
#     return (max_vol - min_vol) / avg_vol if avg_vol != 0 else float('inf')




In [49]:
price_data['bt_spread'] = price_data.apply(bt_spread, axis=1)
price_data['bt_orderbook_imbalance'] = price_data.apply(bt_orderbook_imbalance, axis=1)
price_data['bt_orderbook_imbalance_ratio'] = price_data.apply(bt_orderbook_imbalance_ratio, axis=1)
price_data['bt_bid_flow'] = price_data.apply(bt_bid_order_flow, axis=1)
price_data['bt_ask_flow'] = price_data.apply(bt_ask_order_flow, axis=1)
price_data['bt_order_flow_imbalance'] = price_data.apply(bt_order_flow_imbalance, axis=1)

#add returns:
price_data['future_price'] = price_data['mid_price'].shift(-3)
price_data['returns'] = (price_data['future_price'] - price_data['mid_price']) / price_data['mid_price']
price_data.dropna(subset=['returns'], inplace=True)
#replace any nan values with 0 in the features
# price_data.fillna(0, inplace=True)

In [50]:
price_data.head()

Unnamed: 0,timestamp,product,bid_price_1,bid_volume_1,bid_price_2,bid_volume_2,bid_price_3,bid_volume_3,ask_price_1,ask_volume_1,...,bt_vol_ratio,bt_spread,bt_orderbook_imbalance,bt_orderbook_imbalance_ratio,bt_bid_flow,bt_ask_flow,bt_order_flow_imbalance,future_price,returns_3_ticks,returns
1,0,STARFRUIT,5036,30,,,,,5043,30,...,0.560561,0.001389,0.0,1.0,,,,5040.5,0.000198,0.000198
3,100,STARFRUIT,5041,4,5037.0,1.0,5036.0,30.0,5043,31,...,0.560561,0.000397,0.771429,7.75,-26.0,1.0,-27.0,5039.5,-0.000496,-0.000496
5,200,STARFRUIT,5037,1,5036.0,21.0,,,5043,1,...,0.560561,0.00119,0.0,1.0,-3.0,-30.0,27.0,5040.5,9.9e-05,9.9e-05
6,300,STARFRUIT,5037,25,,,,,5044,25,...,0.560561,0.001389,0.0,1.0,24.0,24.0,0.0,5040.5,0.0,0.0
9,400,STARFRUIT,5037,25,,,,,5042,6,...,0.560561,0.000992,-0.612903,0.24,0.0,-19.0,19.0,5040.0,9.9e-05,9.9e-05


In [51]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer


imputer = SimpleImputer(strategy='mean')
price_data[['bt_rip_indicator', 'bt_x_vol', 'bt_vol_ratio', 'bt_spread', 'bt_orderbook_imbalance', 'bt_orderbook_imbalance_ratio', 'bt_bid_flow', 'bt_ask_flow', 'bt_order_flow_imbalance']] = imputer.fit_transform(price_data[['bt_rip_indicator', 'bt_x_vol', 'bt_vol_ratio', 'bt_spread', 'bt_orderbook_imbalance', 'bt_orderbook_imbalance_ratio', 'bt_bid_flow', 'bt_ask_flow', 'bt_order_flow_imbalance']])
features = ['bt_rip_indicator', 'bt_x_vol', 'bt_vol_ratio', 'bt_spread', 'bt_orderbook_imbalance', 'bt_orderbook_imbalance_ratio', 'bt_bid_flow', 'bt_ask_flow', 'bt_order_flow_imbalance']
X = price_data[features]
y = price_data['returns']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = LinearRegression()
model.fit(X_train, y_train)


y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print('Mean Squared Error:', mse)
print('R^2 Score:', model.score(X_test, y_test)) 


Mean Squared Error: 1.0172561112379647e-07
R^2 Score: 0.20680110359045245


In [54]:
coefficients = model.coef_ 
intercept = model.intercept_ 

features = ['bt_rip_indicator', 'bt_x_vol', 'bt_vol_ratio', 'bt_spread', 'bt_orderbook_imbalance', 'bt_orderbook_imbalance_ratio', 'bt_bid_flow', 'bt_ask_flow', 'bt_order_flow_imbalance']
for feature, coef in zip(features, coefficients):
    print(f"{feature}: {coef}")
print("intercept:", intercept)

bt_rip_indicator: 0.0
bt_x_vol: 1.2393382185941624e-05
bt_vol_ratio: 1.7510197185065842e-05
bt_spread: 0.010746712418221607
bt_orderbook_imbalance: -0.00017974085965982107
bt_orderbook_imbalance_ratio: -4.361872758752063e-06
bt_bid_flow: -78381908.27485323
bt_ask_flow: 78381908.27485229
bt_order_flow_imbalance: 78381908.27485344
intercept: -1.2519778872005928e-05


In [None]:
##  features for actual trading:

# Measuring spread: BEST_ASK-BEST_BID,  (BEST_ASK-BEST_BID)/MID_PRICE 
def spread_ (order_depth):
    best_bid = max(order_depth.buy_orders.keys(), default=0)
    best_ask = min(order_depth.sell_orders.keys(), default=0)
    mid_price = (best_ask + best_bid) / 2
    spread = (best_ask - best_bid) / mid_price
    return spread, mid_price


# Orderbook imbalance: MAX ASK DEPTH - MAX BID DEPTH, or normalized version:  (MAX_ASK_DEPTH - MAX_BID_DEPTH)/TOTAL_ORDERBOOK_DEPTH  and MAX_ASK_DEPTH/MAX_BID_DEPTH. 
def orderbook_imbalance (order_depth):
    max_ask_depth = order_depth.sell_orders.values().size()
    max_bid_depth = order_depth.buy_orders.values().size()
    total_depth = abs(max_ask_depth) + abs(max_bid_depth)

    if total_depth == 0:
        normalized_imbalance = 0
    else:
        normalized_imbalance = (max_ask_depth - max_bid_depth) / total_depth

    max_ratio = max_ask_depth / max_bid_depth if max_bid_depth != 0 else float('inf')

    return normalized_imbalance, max_ratio

# RIP_INDICATOR: 1 if return on price in last x timestamps is >=y%, and 0 otherwise. (need to pick x and y). 
def rip_indicator (trades, x, y):
    if len(trades) < x:
        return 0 
    returns = trades.pct_change(periods=5)
    
    return 1 if returns >= y else 0

# X_VOL: (normalized) volatility (aka standard deviation) of stock in last x timestamps (need to pick x). 
def x_vol (trades, x):
    if len(trades) < x:
        return 0
    latest_trades = trades[-x:]
    return np.std(latest_trades)

# VOL_RATIO: (MAX_VOL_LAST_x_TIMESTAMPS - MIN_VOL_LAST_x_TIMESTAMPS)/AVG_VOL_LAST_x_TIMESTAMPS, (need to pick/refine x). 
def vol_ratio (trades, x):
    if len(trades) < x:
        return 0 
    latest_trades = trades[-x:]
    volatilities = [np.std(latest_trades[max(0, i-10):i+1]) for i in range(len(latest_trades))]

    max_vol = max(volatilities)
    min_vol = min(volatilities)
    avg_vol = np.mean(volatilities)

    return (max_vol - min_vol) / avg_vol if avg_vol != 0 else float('inf')


# BID_ORDER_FLOW = new volume (net/positive) at best bid price. For example, if best bid price volume changes from 200 to 100, the alpha is -100.  Same for ASK_ORDER_FLOW. 
# THIS IS DONE IN THE TRADING STRATEGY FUNCTION


# def bid_ask_order_flow (order_depth, previous_order_depth):
#     best_bid = max(order_depth.buy_orders.keys(), default=0)
#     previous_best_bid = max(previous_order_depth.buy_orders.keys(), default=0)
    
#     bid_flow = order_depth.buy_orders.get(best_bid, 0) - previous_order_depth.buy_orders.get(previous_best_bid, 0)
    
#     best_ask = min(order_depth.sell_orders.keys(), default=0)
#     previous_best_ask = min(previous_order_depth.sell_orders.keys(), default=0)
    
#     ask_flow = order_depth.sell_orders.get(best_ask, 0) - previous_order_depth.sell_orders.get(previous_best_ask, 0)

#     return bid_flow, ask_flow


# ORDER_FLOW_IMBALANCE: BID_ORDER_FLOW - ASK_ORDER_FLOW
# THIS IS DONE IN TRADING STRATEGY FUNCTION
# def order_flow_imbalance (order_depth, previous_order_depth):
#     bid_flow, ask_flow = bid_ask_order_flow(order_depth, previous_order_depth)
#     return bid_flow - ask_flow

def predict_returns(bt_rip_indicator, bt_x_vol, bt_vol_ratio, bt_spread, 
                    bt_orderbook_imbalance, bt_orderbook_imbalance_ratio, 
                    bt_bid_flow, bt_ask_flow, bt_order_flow_imbalance):
    
    coef_bt_rip_indicator = 0.0
    coef_bt_x_vol = 1.2393382185941624e-05
    coef_bt_vol_ratio = 1.7510197185065842e-05
    coef_bt_spread = 0.010746712418221607
    coef_bt_orderbook_imbalance = -0.00017974085965982107
    coef_bt_orderbook_imbalance_ratio = -4.361872758752063e-06
    coef_bt_bid_flow = -78381908.27485323
    coef_bt_ask_flow = 78381908.27485229
    coef_bt_order_flow_imbalance = 78381908.27485344

    
    intercept = -1.2519778872005928e-05

    prediction = (coef_bt_rip_indicator * bt_rip_indicator +
                  coef_bt_x_vol * bt_x_vol +
                  coef_bt_vol_ratio * bt_vol_ratio +
                  coef_bt_spread * bt_spread +
                  coef_bt_orderbook_imbalance * bt_orderbook_imbalance +
                  coef_bt_orderbook_imbalance_ratio * bt_orderbook_imbalance_ratio +
                  coef_bt_bid_flow * bt_bid_flow +
                  coef_bt_ask_flow * bt_ask_flow +
                  coef_bt_order_flow_imbalance * bt_order_flow_imbalance +
                  intercept)

    return prediction



def run(self, state: TradingState) -> tuple[dict[Symbol, list[Order]], int, str]:

    result = {'AMETHYSTS': [], 'STARFRUIT': []}
    conversions = 0
    trader_data = ""

    # TODO: Add logic
    prev_bid_ask_vol = []
    prev_mid_price = []
    amethystOrders = self.amethysts(state.order_depths, state.position)
    starfruitOrders = self.starfruit(state.order_depths, state.position, prev_bid_ask_vol, prev_mid_price)
    result['AMETHYSTS'] = amethystOrders


    logger.flush(state, result, conversions, trader_data)
    return result, conversions, trader_data