# Connector

In [7]:
import requests
import pandas as pd

import os
from dotenv import load_dotenv

load_dotenv()

Alpaca_endpoint = os.getenv("ALPACA_ENDPOINT")
Alpaca_key = os.getenv("ALPACA_KEY")
Alpaca_secret = os.getenv("ALPACA_SECRET")


# connect to alphca api

def connect_to_alphca_api(api_key: str) -> requests.Session:
    session = requests.Session()
    session.headers.update({
        "Authorization": f"Bearer {api_key}"
    })
    return session

# Order Book backtest

In [21]:
import json
from datetime import datetime

# Load the entire JSON file
with open('order_book.json', 'r') as f:
    orderbook_data = json.load(f)


# Access a single
first_snapshot = orderbook_data[0]
print(f"Asset: {first_snapshot['asset']}")
print(f"Time: {first_snapshot['time']}")
print(f"Best Bid: {first_snapshot['data']['bids'][0]['price']}")
print(f"Best Ask: {first_snapshot['data']['asks'][0]['price']}")

for snapshot in orderbook_data[0:10]:
    print(snapshot['time'])


Asset: BTC/USD
Time: 2025-11-21T19:27:30.908267503Z
Best Bid: 82271.97
Best Ask: 84990.3
2025-11-21T19:27:30.908267503Z
2025-11-21T19:27:36.30235157Z
2025-11-21T19:27:36.30238377Z
2025-11-21T19:27:36.30240492Z
2025-11-21T19:27:36.30241237Z
2025-11-21T19:27:36.30242238Z
2025-11-21T19:27:36.30247184Z
2025-11-21T19:27:36.30254818Z
2025-11-21T19:27:36.30256045Z
2025-11-21T19:27:36.30261834Z


In [23]:
import json
import pandas as pd
import numpy as np

with open('order_book.json', 'r') as f:
    orderbook_data = json.load(f)

# Extract time series data
timestamps = []
best_bids = []
best_asks = []
imbalances = []

for snapshot in orderbook_data:
    timestamps.append(snapshot['time'])
    bids = snapshot['data']['bids']
    asks = snapshot['data']['asks']
    
    if bids and asks:
        best_bids.append(bids[-1]['price'])
        best_asks.append(asks[0]['price'])
        
        # Calculate imbalance
        bid_vol = sum(b['size'] for b in bids[:5])
        ask_vol = sum(a['size'] for a in asks[:5])
        total_vol = bid_vol + ask_vol
        imbalance = (bid_vol - ask_vol) / total_vol if total_vol > 0 else 0
        imbalances.append(imbalance)
    else:
        best_bids.append(None)
        best_asks.append(None)
        imbalances.append(None)

# Create DataFrame
df = pd.DataFrame({
    'timestamp': pd.to_datetime(timestamps),
    'best_bid': best_bids,
    'best_ask': best_asks,
    'imbalance': imbalances
})

# Calculate spread
df['spread'] = df['best_ask'] - df['best_bid']
df['mid_price'] = (df['best_bid'] + df['best_ask']) / 2

# Now you can do time-series analysis
print(df.head())
print(f"\nAverage spread: {df['spread'].mean()}")
print(f"Average imbalance: {df['imbalance'].mean()}")

                            timestamp   best_bid   best_ask  imbalance  \
0 2025-11-21 19:27:30.908267503+00:00  84825.400  84990.300   0.091315   
1 2025-11-21 19:27:36.302351570+00:00  84802.167  84990.300   0.091315   
2 2025-11-21 19:27:36.302383770+00:00  84845.360  84990.300   0.091315   
3 2025-11-21 19:27:36.302404920+00:00  84845.360  85017.205  -0.053400   
4 2025-11-21 19:27:36.302412370+00:00  84845.360  84972.300   0.091653   

    spread   mid_price  
0  164.900  84907.8500  
1  188.133  84896.2335  
2  144.940  84917.8300  
3  171.845  84931.2825  
4  126.940  84908.8300  

Average spread: 158.9049069767473
Average imbalance: 0.05775294927866587
