In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import polars as pl
import matplotlib.pyplot as plt
from shared.poly_utils import get_markets, PLATFORM_WALLETS

pl.Config.set_tbl_rows(25)
pl.Config.set_tbl_cols(-1)  # Show all columns
cfg = pl.Config.set_tbl_width_chars(1000)  # Wider display


In [None]:
markets_df = get_markets()

In [None]:
USERS = {
    'domah': '0x9d84ce0306f8551e02efef1680475fc0f1dc1344',
    '50pence': '0x3cf3e8d5427aed066a7a5926980600f6c3cf87b3',
    'fhantom': '0x6356fb47642a028bc09df92023c35a21a0b41885',
    'car': '0x7c3db723f1d4d8cb9c550095203b686cb11e5c6b',
    'theo4': '0x56687bf447db6ffa42ffe2204a05edaa20f55839'
}

In [None]:
# Optimized Data Loading Strategy
# Instead of window functions over the whole dataset (which causes OOM),
# we split the task: 1. Calculate Market Prices. 2. Filter User Trades. 3. Join.

# Note: path is relative to analysis/ folder
df_scan = pl.scan_parquet("../processed/trades/**/*.parquet")

# 1. Get Market Prices (Last traded price per market/side)
price_df = df_scan.select([
    "timestamp", "market_id", "nonusdc_side", "price"
]).group_by(["market_id", "nonusdc_side"]).agg(
    pl.col("price").sort_by("timestamp").last().alias("last_price")
)


In [None]:
TARGET_USER = USERS['domah']

# 2. Get User Trades (Filtered early for memory efficiency)
user_trades = df_scan.filter(pl.col("maker") == TARGET_USER).select([
    "timestamp", "market_id", "maker_direction", "nonusdc_side", "price", "token_amount", "usd_amount", "transactionHash"
]).rename({"maker_direction": "direction", "nonusdc_side": "side"})

# 3. Aggregate User Trades
user_agg = user_trades.group_by(["market_id", "side"]).agg(
    (pl.when(pl.col("direction") == "BUY").then(pl.col("usd_amount")).otherwise(0.0)).sum().alias("buy_usd"),
    (pl.when(pl.col("direction") == "SELL").then(pl.col("usd_amount")).otherwise(0.0)).sum().alias("sell_usd"),
    (pl.when(pl.col("direction") == "BUY").then(pl.col("token_amount")).otherwise(0.0)).sum().alias("buy_tokens"),
    (pl.when(pl.col("direction") == "SELL").then(pl.col("token_amount")).otherwise(0.0)).sum().alias("sell_tokens"),
    pl.len().alias("trades")
)

# 4. Join User Aggregates with Last Prices
trader_df = user_agg.join(
    price_df, 
    left_on=["market_id", "side"], 
    right_on=["market_id", "nonusdc_side"], 
    how="left"
)


In [None]:
# 5. Calculate PnL with Clamping Logic
trader_df = trader_df.with_columns(
    last_price = (
        pl.when(pl.col("last_price") > 0.98).then(pl.lit(1.0))
         .when(pl.col("last_price") < 0.02).then(pl.lit(0.0))
         .otherwise(pl.col("last_price"))
    )
).with_columns(
    (pl.col("sell_usd") - pl.col("buy_usd")).alias("cash_pnl_usd"),
    (pl.col("buy_tokens") - pl.col("sell_tokens")).alias("inventory_tokens"),
).with_columns(
    (pl.col("inventory_tokens") * pl.col("last_price")).alias("unrealized_usd"),
).with_columns(
    (pl.col("cash_pnl_usd") + pl.col("unrealized_usd")).alias("total_pnl_usd"),
)


### This is how polymarket generates its events and this is how you get all trades for a given user. Even if it looks like we are only getting data where the user is a maker, that is not how it works on the contract level. maker shows trades from that users pov including price

In [None]:
print("Calculating Total PnL for user:", TARGET_USER)
# Use streaming=True (or engine='streaming' in newer polars) where possible, though join/collect might just work on agg data
result = trader_df.collect()
print("Total PnL:", result['total_pnl_usd'].sum())