In [2]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from collections import defaultdict
from typing import List, Dict, Tuple
from collections import defaultdict


In [12]:
# --------------
# CONFIGURATION
# --------------
p = ''
PRICES_PATH = 'round-5-island-data-bottle/prices_round_5_day_{day}.csv'
TRADES_PATH = 'round-5-island-data-bottle/trades_round_5_day_{day}.csv'
DAYS = [2, 3, 4]
LEAD_LAG_DT = 500  # timestamps ahead/behind to compute lead returns

# --------------
# 1. LOAD AND MERGE DATA
# --------------
def load_and_merge(day: int) -> pd.DataFrame:
    prices = pd.read_csv(PRICES_PATH.format(day=day), sep=';')
    trades = pd.read_csv(TRADES_PATH.format(day=day), sep=';')

    trades['day'] = day

    prices = prices.rename(columns={'product': 'symbol'})
    trades = trades.rename(columns={
        'price': 'trade_price'
    })

    # rename for consistency
    to_merge = prices[['day', 'timestamp', 'symbol', 'mid_price']]
    df = pd.merge(
        trades,
        to_merge,
        on=['day', 'timestamp', 'symbol'],
        how='left'
    )
    
    return df

In [13]:
def compute_informed_scores(df: pd.DataFrame) -> pd.DataFrame:
    # 1) sort + lead/lag mid_price
    df = df.sort_values(['symbol','timestamp'])
    df['mid_before'] = df.groupby('symbol')['mid_price'].shift(LEAD_LAG_DT)
    df['mid_after']  = df.groupby('symbol')['mid_price'].shift(-LEAD_LAG_DT)
    df['return']     = (df['mid_after'] - df['mid_before']) / df['mid_before']

    # drop any rows where we can't compute a return
    df = df.dropna(subset=['return'])

    # 2) build one row per (counterparty, side)
    buyers = df[['symbol','return','buyer']].rename(columns={'buyer':'counterparty'})
    buyers['side'] = 'buy'

    sellers = df[['symbol','return','seller']].rename(columns={'seller':'counterparty'})
    sellers['side'] = 'sell'

    long_df = pd.concat([buyers, sellers], ignore_index=True)

    # 3) average return per counterparty/symbol/side
    scores = (
        long_df
        .groupby(['counterparty','symbol','side'])['return']
        .mean()
        .reset_index()
    )
    return scores

In [18]:
# --------------
# 3. LIQUIDITY & SLIPPAGE METRICS
# --------------
def compute_liquidity_slippage(df: pd.DataFrame) -> pd.DataFrame:
    # assume df has columns: 'mid_price', 'trade_price', 'quantity', 'buyer'
    df = df.copy()
    df['mid_when_traded'] = df['mid_price']
    # slippage signed by trade direction; quantity>0 is 'buy', so positive slippage = we bought above mid
    df['slippage'] = (df['trade_price'] - df['mid_when_traded']) * np.sign(df['quantity'])

    # average quantity & slippage per buyer
    summary = (
        df
        .groupby('buyer')
        .agg(
            avg_size     = ('quantity',   'mean'),
            avg_slippage = ('slippage',   'mean')
        )
        .reset_index()
        .rename(columns={'buyer':'counterparty'})
    )
    return summary


In [20]:
# --------------
# 4. BUILD TRADE FLOW NETWORK
# --------------
def build_trade_graph(df: pd.DataFrame) -> nx.DiGraph:
    G = nx.DiGraph()
    # nodes = counterparties
    # edges weighted by volume from buyer→seller
    for _, row in df.iterrows():
        b, s, vol = row["buyer"], row["seller"], abs(row["quantity"])
        if not G.has_node(b): G.add_node(b, total_volume=0)
        if not G.has_node(s): G.add_node(s, total_volume=0)
        G.nodes[b]["total_volume"] += vol
        G.nodes[s]["total_volume"] += vol
        if G.has_edge(b, s):
            G[b][s]["weight"] += vol
        else:
            G.add_edge(b, s, weight=vol)
    return G


In [7]:
# --------------
# 5. COUNTERPARTY IMBALANCE SIGNAL
# --------------
def compute_imbalance_signal(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df['sign'] = np.sign(df['quantity'])
    df['weighted_side'] = df['sign'] * df['informed_score']  # use informed_score from step 2
    imbalance = df.groupby(['timestamp','symbol'])['weighted_side'].sum().reset_index()
    return imbalance


In [22]:
def compute_fill_distribution(df: pd.DataFrame) -> pd.DataFrame:
    # now df has a .counterparty column
    fills = (
        df
        .groupby(['symbol','counterparty'])['quantity']
        .sum()
        .reset_index(name='filled_qty')
    )
    total = (
        fills
        .groupby('symbol')['filled_qty']
        .sum()
        .reset_index(name='total_qty')
    )
    merged = fills.merge(total, on='symbol')
    merged['share'] = merged['filled_qty']/merged['total_qty']
    return merged


In [23]:
def compute_time_of_day_activity(df: pd.DataFrame) -> pd.DataFrame:
    # derive hour from timestamp (intra‑day)
    df = df.copy()
    df["hour"] = (df["timestamp"] // 100000) % 24
    grouped = (
        df
        .groupby(["counterparty", "symbol", "hour"])
        .size()
        .reset_index(name="num_trades")
    )
    return grouped


In [25]:
# -- --------------
#  MAIN ANALYSIS
# -- --------------
if __name__ == "__main__":
    # 1) load & merge
    all_days: List[pd.DataFrame] = []
    for d in DAYS:
        all_days.append(load_and_merge(d))
    df_all = pd.concat(all_days, ignore_index=True)

    #  — who’s the “counterparty” we group by? use the buyer field:
    df_all['counterparty'] = df_all['buyer']

    #  — define trade side for informed score merging
    df_all['side'] = np.where(df_all['quantity'] > 0, 'buy', 'sell')

    print(df_all.shape)

    print(df_all.head())

    # 2) informed scores
    scores = compute_informed_scores(df_all)
    scores.to_csv("round-5-observations/informed_scores.csv", index=False)

    # 3) liquidity & slippage (grouped by buyer as counterparty)
    liq = compute_liquidity_slippage(df_all)
    liq.to_csv("round-5-observations/liquidity_slippage.csv", index=False)


    # 4) trade‑flow network
    G = build_trade_graph(df_all)
    nx.write_gexf(G, "round-5-observations/trade_flow_network.gexf")

    # 5) imbalance signal (merge in the informed score)
    df_all = df_all.merge(
        scores[["counterparty","symbol","side","return"]],
        on=["counterparty","symbol","side"],
        how="left"
    ).rename(columns={"return":"informed_score"})
    imbalance = compute_imbalance_signal(df_all)
    imbalance.to_csv("round-5-observations/imbalance_signal.csv", index=False)


    # 6) fill distribution
    fill = compute_fill_distribution(df_all)
    fill.to_csv("round-5-observations/fill_distribution.csv", index=False)

    # 7) temporal patterns
    tod = compute_time_of_day_activity(df_all)
    tod.to_csv("round-5-observations/counterparty_time_activity.csv", index=False)

    print("✅ Analysis complete, CSV/GEXF outputs generated.")

(53477, 11)
   timestamp    buyer   seller            symbol   currency  trade_price  \
0          0   Caesar    Paris        CROISSANTS  SEASHELLS       4265.0   
1        100  Charlie    Paris  RAINFOREST_RESIN  SEASHELLS       9999.0   
2        200    Paris   Caesar        CROISSANTS  SEASHELLS       4266.0   
3        200    Paris   Caesar              JAMS  SEASHELLS       6520.0   
4        200    Paris  Charlie              KELP  SEASHELLS       2046.0   

   quantity  day  mid_price counterparty side  
0         8    2     4265.5       Caesar  buy  
1         1    2     9995.5      Charlie  buy  
2         8    2     4265.5        Paris  buy  
3         7    2     6519.5        Paris  buy  
4         1    2     2046.5        Paris  buy  
✅ Analysis complete, CSV/GEXF outputs generated.


In [30]:
import pandas as pd
import networkx as nx
import plotly.express as px
import plotly.graph_objects as go

# ——————————————————————————————
# 1) Price & Trade Overview (rebuild df_all)
# ——————————————————————————————
PRICES_PATH = 'round-5-island-data-bottle/prices_round_5_day_{day}.csv'
TRADES_PATH = 'round-5-island-data-bottle/trades_round_5_day_{day}.csv'
DAYS = [2,3,4]

def load_and_merge(day: int) -> pd.DataFrame:
    prices = pd.read_csv(PRICES_PATH.format(day=day), sep=';')
    trades = pd.read_csv(TRADES_PATH.format(day=day), sep=';')
    trades['day'] = day
    prices = prices.rename(columns={'product':'symbol'})
    trades = trades.rename(columns={'price':'trade_price'})
    to_merge = prices[['day','timestamp','symbol','mid_price']]
    df = trades.merge(to_merge, on=['day','timestamp','symbol'], how='left')
    return df

# build full df_all
all_days = [load_and_merge(d) for d in DAYS]
df_all = pd.concat(all_days, ignore_index=True)
# for coloring we want symbol, for sizing the markers we use quantity
df_all['quantity'] = df_all['quantity'].astype(float)

fig1 = px.line(
    df_all,
    x="timestamp", y="mid_price", color="symbol",
    title="Mid‑Price Time Series by Product",
    labels={"mid_price":"Mid Price","timestamp":"Timestamp","symbol":"Product"}
)
fig1.add_trace(
    go.Scatter(
        x = df_all["timestamp"],
        y = df_all["mid_price"],
        mode="markers",
        marker=dict(
            size=(df_all["quantity"].abs())**0.5,
            color="rgba(255,0,0,0.4)"
        ),
        name="Trade Size"
    )
)
fig1.show()


# ——————————————————————————————
# 2) Informed‑Trader Scores
# ——————————————————————————————
df_scores = pd.read_csv("round-5-observations/informed_scores.csv")
fig2 = px.bar(
    df_scores,
    x="counterparty", y="return", color="symbol",
    barmode="group",
    title="Informed‑Trader Scores by Counterparty & Product",
    labels={"return":"Informed Score","counterparty":"Counterparty"}
)
fig2.update_layout(xaxis_tickangle=-45)
fig2.show()


# ——————————————————————————————
# 3) Liquidity & Slippage Metrics
# ——————————————————————————————
df_liq = pd.read_csv("round-5-observations/liquidity_slippage.csv")
fig3 = px.scatter(
    df_liq,
    x="avg_size", y="avg_slippage",
    color="counterparty", size="avg_size",
    title="Average Trade‑Size vs. Slippage by Counterparty",
    labels={"avg_size":"Avg. Trade Size","avg_slippage":"Avg. Slippage"}
)
fig3.show()


# ——————————————————————————————
# 4) Trade‑Flow Network
# ——————————————————————————————
G = nx.read_gexf("round-5-observations/trade_flow_network.gexf")
pos = nx.spring_layout(G, k=0.5, iterations=50)

edge_x, edge_y = [], []
for u,v in G.edges():
    x0,y0 = pos[u]; x1,y1 = pos[v]
    edge_x += [x0, x1, None]; edge_y += [y0, y1, None]

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    mode="lines",
    line=dict(color="#888", width=1),
    hoverinfo="none"
)

node_x, node_y, node_text = [], [], []
for n in G.nodes():
    x,y = pos[n]
    node_x.append(x); node_y.append(y)
    vol = G.nodes[n].get("total_volume", 0)
    node_text.append(f"{n}<br>Total Volume: {vol}")

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode="markers+text",
    marker=dict(
        size=[max(5, G.nodes[n].get("total_volume",0)**0.3) for n in G.nodes()],
        color="skyblue", line_width=1
    ),
    text=list(G.nodes()),
    textposition="bottom center",
    hovertext=node_text,
    hoverinfo="text"
)

fig4 = go.Figure([edge_trace, node_trace])
fig4.update_layout(
    title="Trade‑Flow Network (Counterparty as Nodes)",
    showlegend=False,
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
)
fig4.show()


# ——————————————————————————————
# 5) Counterparty Imbalance Signal
# ——————————————————————————————
# 5) Counterparty Imbalance Signal  (actually per‐symbol here)
# -------------------------------------------------------------------
df_imb = pd.read_csv("round-5-observations/imbalance_signal.csv")

fig5 = px.line(
    df_imb,
    x="timestamp",
    y="weighted_side",
    facet_col="symbol",        # facet per product
    facet_col_wrap=2,
    title="Imbalance Signal Over Time by Product",
    labels={
        "weighted_side": "Imbalance Signal",
        "timestamp":    "Timestamp",
        "symbol":       "Product"
    }
)
fig5.update_layout(showlegend=False)
fig5.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))  # simplify facet titles
fig5.show()


# ——————————————————————————————
# 6) Fill‑Distribution Heatmap
# ——————————————————————————————
df_fill = pd.read_csv("round-5-observations/fill_distribution.csv")
pivot = df_fill.pivot_table(
    index="counterparty", columns="symbol", values="share",
    aggfunc="sum", fill_value=0
)
fig6 = px.imshow(
    pivot.T,
    labels=dict(x="Counterparty", y="Product", color="Fill Share"),
    title="Fill‑Share Distribution by Counterparty & Product"
)
fig6.show()


# ——————————————————————————————
# 7) Temporal Patterns of Activity
# ——————————————————————————————
df_temp = pd.read_csv("round-5-observations/counterparty_time_activity.csv")
fig7 = px.line(
    df_temp,
    x="hour", y="num_trades",
    color="counterparty",
    facet_col="symbol", facet_col_wrap=2,
    title="Hourly Trade Counts by Counterparty & Product",
    labels={"num_trades":"# Trades","hour":"Hour of Day"}
)
fig7.update_layout(legend=dict(title="Counterparty"))
fig7.show()
