In [None]:
# Fix matplotlib display issues with pyarrow/polars
# Import and configure matplotlib BEFORE importing polars
import matplotlib
matplotlib.use('TkAgg')  # Use interactive backend (alternatives: 'QtAgg', 'Qt5Agg')
import matplotlib.pyplot as plt

# Now import polars (which uses pyarrow internally)
import polars as pl

In [None]:
csv_trades = pl.read_csv("trades_100k.csv")

In [None]:
# Organize trades by ticker_name for line graph analysis

# Convert created_time to datetime and sort
csv_trades = csv_trades.with_columns(
    pl.col("created_time").str.to_datetime(time_zone="America/New_York")
).sort(["ticker", "created_time"])

# Group by ticker and show summary statistics
ticker_summary = csv_trades.group_by("ticker").agg([
    pl.len().alias("trade_count"),
    pl.col("yes_price").mean().alias("avg_yes_price"),
    pl.col("yes_price").min().alias("min_yes_price"),
    pl.col("yes_price").max().alias("max_yes_price"),
    pl.col("count").sum().alias("total_volume")
]).sort("trade_count", descending=True)

print(f"Total unique tickers: {ticker_summary.height}")
print(f"\nTop 20 most traded tickers:")
print(ticker_summary.head(20))

# Example: Get trades for a specific ticker (replace with desired ticker)
# example_ticker = ticker_summary[0, "ticker"]
# ticker_trades = csv_trades.filter(pl.col("ticker") == example_ticker)
# print(f"\nTrades for {example_ticker}:")
# print(ticker_trades.select(["created_time", "yes_price", "count"]))

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Create line graphs for the top 6 most traded tickers
top_tickers = ticker_summary.head(6)["ticker"].to_list()

fig, axes = plt.subplots(3, 2, figsize=(15, 12))
axes = axes.flatten()

for idx, ticker in enumerate(top_tickers):
    # Filter trades for this ticker
    ticker_trades = csv_trades.filter(pl.col("ticker") == ticker)
    
    # Convert to pandas for easier plotting (matplotlib works well with pandas)
    # FIXED: Create yes_price_dollars by dividing yes_price by 100
    df = ticker_trades.select([
        "created_time", 
        (pl.col("yes_price") / 100).alias("yes_price_dollars"),
        "count"
    ]).to_pandas()
    
    # Plot
    ax = axes[idx]
    ax.plot(df["created_time"], df["yes_price_dollars"], marker='o', linestyle='-', markersize=3, alpha=0.7)
    ax.set_title(f"{ticker}\n({len(df)} trades)", fontsize=10, fontweight='bold')
    ax.set_xlabel("Time")
    ax.set_ylabel("Yes Price (USD)")
    ax.grid(True, alpha=0.3)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha='right')

plt.tight_layout()
plt.show()

print(f"\nDisplayed line graphs for top {len(top_tickers)} tickers by trade volume")