In [55]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
import numpy as np
import matplotlib.pyplot as plt

In [56]:
def check_stationarity(series):
    result = adfuller(series)
    return result[1] > 0.05  # p-value > 0.05 implies non-stationary

# Calculate mid prices for each product
def calculate_mid_prices(df):
    # Assuming the highest bid and lowest ask are the best prices
    best_bid = df[['bid_price_1', 'bid_price_2', 'bid_price_3']].max(axis=1)
    best_ask = df[['ask_price_1', 'ask_price_2', 'ask_price_3']].min(axis=1)
    
    # Calculate mid price
    mid_price = (best_bid + best_ask) / 2
    return mid_price

In [57]:
curr_round = 1
root_path = f'Round {curr_round}'

# Extract into dataframe
data = pd.read_csv(f'{root_path}/prices_round_{curr_round}_day_0.csv', delimiter=';')

Test if a product's price is non stationary with the Augmented Dickey-Fuller (ADF) test

In [58]:
# Filter data for each stock
stock1 = data[data['product'] == 'KELP'].copy()
stock2 = data[data['product'] == 'SQUID_INK'].copy()

# Reset indices to ensure they're aligned
stock1.reset_index(drop=True, inplace=True)
stock2.reset_index(drop=True, inplace=True)

# Calculate mid prices
stock1['mid_price'] = calculate_mid_prices(stock1)
stock2['mid_price'] = calculate_mid_prices(stock2)

stock1_mid_prices = stock1['mid_price']
stock2_mid_prices = stock2['mid_price']

# Check stationarity of mid prices
print(f"\nKELP mid price is non-stationary: {check_stationarity(stock1_mid_prices.dropna())}")
print(f"SQUID_INK mid price is non-stationary: {check_stationarity(stock2_mid_prices.dropna())}")


KELP mid price is non-stationary: True
SQUID_INK mid price is non-stationary: True


In [59]:
# Regress StockA on StockB
from statsmodels.api import OLS
X = sm.add_constant(stock2_mid_prices)

model = OLS(stock1_mid_prices, X).fit()
spread = model.resid

# Test spread for stationarity
adf_result = adfuller(spread)
p_value = adf_result[1]
print(f"Spread ADF p-value: {p_value:.4f}")

if p_value < 0.05:
    print("Stocks are cointegrated (spread is stationary)")
else:
    print("No cointegration")


Spread ADF p-value: 0.0701
No cointegration


In [49]:
from statsmodels.tsa.vector_ar.vecm import coint_johansen

# Select stocks (e.g., 3 stocks)
stocks = 
result = coint_johansen(stocks, det_order=0, k_ar_diff=1)

# Check trace statistics
print("Trace statistics:", result.lr1)
print("Critical values:", result.cvt)


SyntaxError: invalid syntax (1991242321.py, line 4)