# Hyperliquid Sentiment (Fear/Greed) Analysis

This notebook analyzes how market sentiment relates to trader behavior and performance on Hyperliquid.

## Scope
- Data quality checks (rows/columns, missing values, duplicates)
- Daily alignment of trade and sentiment data
- Key trader metrics (PnL, win rate, trade size, leverage proxy, activity, long/short bias)
- Fear vs Greed comparisons with bootstrap confidence intervals
- Trader segmentation (leverage, activity, consistency)
- Bonus predictive model for next-day profitability

In [None]:
from pathlib import Path
from typing import Callable

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from IPython.display import display
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    roc_auc_score,
    roc_curve,
)
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

sns.set_theme(style='whitegrid')
pd.set_option('display.max_columns', 200)

ROOT = Path.cwd().resolve()
TRADES_PATH = ROOT / 'historical_data.csv'
SENTIMENT_PATH = ROOT / 'fear_greed_index.csv'
OUTPUT_DIR = ROOT / 'outputs'
TABLES_DIR = OUTPUT_DIR / 'tables'
CHARTS_DIR = OUTPUT_DIR / 'charts'
CLEANED_DIR = OUTPUT_DIR / 'cleaned'

for path in (OUTPUT_DIR, TABLES_DIR, CHARTS_DIR, CLEANED_DIR):
    path.mkdir(parents=True, exist_ok=True)

print(f'Working directory: {ROOT}')

In [None]:
def profile_dataframe(name: str, df: pd.DataFrame) -> dict[str, float]:
    total_missing = int(df.isna().sum().sum())
    return {
        'dataset': name,
        'rows': int(df.shape[0]),
        'columns': int(df.shape[1]),
        'duplicate_rows': int(df.duplicated().sum()),
        'total_missing_cells': total_missing,
        'missing_cell_pct': float(total_missing / (df.shape[0] * df.shape[1])),
    }


def bootstrap_diff(
    fear: pd.Series,
    greed: pd.Series,
    stat_fn: Callable[[np.ndarray], float],
    n_boot: int = 5000,
    seed: int = 42,
) -> dict[str, float]:
    fear_values = fear.dropna().to_numpy()
    greed_values = greed.dropna().to_numpy()
    rng = np.random.default_rng(seed)

    diffs = np.empty(n_boot)
    for idx in range(n_boot):
        fear_sample = rng.choice(fear_values, size=len(fear_values), replace=True)
        greed_sample = rng.choice(greed_values, size=len(greed_values), replace=True)
        diffs[idx] = stat_fn(fear_sample) - stat_fn(greed_sample)

    return {
        'fear_minus_greed': float(np.mean(diffs)),
        'ci_low_95': float(np.quantile(diffs, 0.025)),
        'ci_high_95': float(np.quantile(diffs, 0.975)),
    }


def expected_shortfall_10(values: pd.Series) -> float:
    values = values.dropna()
    if values.empty:
        return float('nan')
    p10 = values.quantile(0.10)
    return float(values[values <= p10].mean())

## Part A: Data Preparation

Load both datasets, check quality, normalize timestamps, align on daily date, and create key metrics.

In [None]:
trades_raw = pd.read_csv(TRADES_PATH)
sentiment_raw = pd.read_csv(SENTIMENT_PATH)

quality_df = pd.DataFrame([
    profile_dataframe('historical_data', trades_raw),
    profile_dataframe('fear_greed_index', sentiment_raw),
])
quality_df.to_csv(TABLES_DIR / 'data_quality_summary.csv', index=False)

print('Data quality summary:')
display(quality_df)

print('\nTrades columns:')
print(trades_raw.columns.tolist())
print('\nSentiment columns:')
print(sentiment_raw.columns.tolist())

# Timestamp integrity check
print('\nTimestamp uniqueness check:')
print('Timestamp unique count:', trades_raw['Timestamp'].nunique())
print('Timestamp IST unique count:', trades_raw['Timestamp IST'].nunique())


In [None]:
def prepare_data(trades_raw: pd.DataFrame, sentiment_raw: pd.DataFrame):
    trades = trades_raw.copy()
    sentiment = sentiment_raw.copy()

    trades['timestamp_ist'] = pd.to_datetime(
        trades['Timestamp IST'], format='%d-%m-%Y %H:%M', errors='coerce'
    )
    trades['timestamp_ms'] = pd.to_datetime(
        trades['Timestamp'], unit='ms', errors='coerce', utc=True
    )
    trades['date'] = trades['timestamp_ist'].dt.date

    sentiment['date'] = pd.to_datetime(sentiment['date'], errors='coerce').dt.date
    sentiment['sentiment_bucket'] = sentiment['classification'].replace({
        'Extreme Fear': 'Fear',
        'Extreme Greed': 'Greed',
    })

    merged = trades.merge(
        sentiment[['date', 'value', 'classification', 'sentiment_bucket']],
        on='date',
        how='left',
    )

    side = merged['Side'].str.upper()
    merged['side_sign'] = np.where(side.eq('BUY'), 1, -1)
    merged['signed_size_usd'] = merged['Size USD'] * merged['side_sign']
    merged['signed_size_tokens'] = merged['Size Tokens'] * merged['side_sign']

    merged['pos_before_usd'] = merged['Start Position'].abs() * merged['Execution Price']
    merged['pos_after_tokens'] = merged['Start Position'] + merged['signed_size_tokens']
    merged['pos_after_usd'] = merged['pos_after_tokens'].abs() * merged['Execution Price']

    # Leverage proxy because explicit account-level leverage/equity is not provided.
    merged['leverage_proxy'] = (
        merged['pos_after_usd'] / (merged['pos_before_usd'] + 1.0)
    ).clip(lower=0, upper=50)

    merged['is_realized'] = (merged['Closed PnL'] != 0).astype(int)
    merged['is_win'] = (merged['Closed PnL'] > 0).astype(int)
    merged['is_loss'] = (merged['Closed PnL'] < 0).astype(int)

    daily = (
        merged.groupby(['Account', 'date', 'sentiment_bucket'], dropna=False, as_index=False)
        .agg(
            trades=('Trade ID', 'count'),
            daily_pnl_usd=('Closed PnL', 'sum'),
            avg_trade_size_usd=('Size USD', 'mean'),
            total_notional_usd=('Size USD', 'sum'),
            realized_trades=('is_realized', 'sum'),
            wins=('is_win', 'sum'),
            losses=('is_loss', 'sum'),
            buy_notional_usd=('signed_size_usd', lambda x: x[x > 0].sum()),
            sell_notional_usd=('signed_size_usd', lambda x: -x[x < 0].sum()),
            avg_leverage_proxy=('leverage_proxy', 'mean'),
            p90_leverage_proxy=('leverage_proxy', lambda x: x.quantile(0.90)),
            total_fees_usd=('Fee', 'sum'),
        )
        .sort_values(['Account', 'date'])
    )

    daily['win_rate'] = np.where(
        daily['realized_trades'] > 0,
        daily['wins'] / daily['realized_trades'],
        np.nan,
    )
    daily['long_short_ratio'] = daily['buy_notional_usd'] / daily['sell_notional_usd'].replace(0, np.nan)
    daily['net_long_bias'] = (
        (daily['buy_notional_usd'] - daily['sell_notional_usd'])
        / (daily['buy_notional_usd'] + daily['sell_notional_usd'] + 1e-9)
    )

    daily['cum_pnl_usd'] = daily.groupby('Account')['daily_pnl_usd'].cumsum()
    daily['running_peak_usd'] = daily.groupby('Account')['cum_pnl_usd'].cummax()
    daily['drawdown_usd'] = daily['cum_pnl_usd'] - daily['running_peak_usd']

    return merged, daily, sentiment


merged, daily, sentiment_clean = prepare_data(trades_raw, sentiment_raw)

merged.to_csv(CLEANED_DIR / 'trades_enriched.csv', index=False)
daily.to_csv(CLEANED_DIR / 'daily_account_metrics.csv', index=False)

print('Prepared datasets:')
print('Merged rows:', len(merged))
print('Daily account rows:', len(daily))
print('Unmatched sentiment rows:', int(merged['sentiment_bucket'].isna().sum()))

display(daily.head())

## Part B: Fear vs Greed Analysis

Answer whether performance and behavior differ between Fear and Greed regimes, and quantify those differences.

In [None]:
def fear_greed_tables(daily: pd.DataFrame):
    fg_daily = daily[daily['sentiment_bucket'].isin(['Fear', 'Greed'])].copy()

    performance = (
        fg_daily.groupby('sentiment_bucket', as_index=False)
        .agg(
            account_days=('Account', 'count'),
            traders=('Account', 'nunique'),
            mean_daily_pnl_usd=('daily_pnl_usd', 'mean'),
            median_daily_pnl_usd=('daily_pnl_usd', 'median'),
            positive_day_rate=('daily_pnl_usd', lambda s: (s > 0).mean()),
            mean_win_rate=('win_rate', 'mean'),
            mean_drawdown_usd=('drawdown_usd', 'mean'),
            p10_drawdown_usd=('drawdown_usd', lambda s: s.quantile(0.10)),
            p05_daily_pnl_usd=('daily_pnl_usd', lambda s: s.quantile(0.05)),
            es10_daily_pnl_usd=('daily_pnl_usd', expected_shortfall_10),
        )
        .sort_values('sentiment_bucket')
    )

    behavior = (
        fg_daily.groupby('sentiment_bucket', as_index=False)
        .agg(
            mean_trades_per_account_day=('trades', 'mean'),
            median_trades_per_account_day=('trades', 'median'),
            mean_trade_size_usd=('avg_trade_size_usd', 'mean'),
            median_trade_size_usd=('avg_trade_size_usd', 'median'),
            mean_total_notional_usd=('total_notional_usd', 'mean'),
            mean_leverage_proxy=('avg_leverage_proxy', 'mean'),
            median_leverage_proxy=('avg_leverage_proxy', 'median'),
            median_long_short_ratio=(
                'long_short_ratio',
                lambda s: s.replace([np.inf, -np.inf], np.nan).median(),
            ),
            mean_long_short_ratio_capped=(
                'long_short_ratio',
                lambda s: s.replace([np.inf, -np.inf], np.nan).clip(0, 5).mean(),
            ),
            mean_net_long_bias=('net_long_bias', 'mean'),
            share_net_long_days=('net_long_bias', lambda s: (s > 0).mean()),
        )
        .sort_values('sentiment_bucket')
    )

    fear = fg_daily[fg_daily['sentiment_bucket'] == 'Fear']
    greed = fg_daily[fg_daily['sentiment_bucket'] == 'Greed']

    tests = []
    metric_specs = [
        ('daily_pnl_usd', 'median_daily_pnl_usd', np.median),
        ('win_rate', 'mean_win_rate', np.mean),
        ('drawdown_usd', 'p10_drawdown_usd', lambda x: float(np.quantile(x, 0.10))),
        ('trades', 'mean_trades_per_account_day', np.mean),
        ('avg_trade_size_usd', 'mean_trade_size_usd', np.mean),
        ('avg_leverage_proxy', 'mean_leverage_proxy', np.mean),
        ('net_long_bias', 'mean_net_long_bias', np.mean),
    ]

    for column, label, fn in metric_specs:
        stats = bootstrap_diff(fear[column], greed[column], fn)
        tests.append({
            'metric': label,
            'fear_minus_greed': stats['fear_minus_greed'],
            'ci_low_95': stats['ci_low_95'],
            'ci_high_95': stats['ci_high_95'],
        })

    return fg_daily, performance, behavior, pd.DataFrame(tests)


fg_daily, performance_df, behavior_df, tests_df = fear_greed_tables(daily)

performance_df.to_csv(TABLES_DIR / 'performance_fear_vs_greed.csv', index=False)
behavior_df.to_csv(TABLES_DIR / 'behavior_fear_vs_greed.csv', index=False)
tests_df.to_csv(TABLES_DIR / 'bootstrap_differences.csv', index=False)

print('Performance summary:')
display(performance_df)
print('\nBehavior summary:')
display(behavior_df)
print('\nBootstrap differences (Fear - Greed):')
display(tests_df)


## Segment Analysis

Build segments for leverage, activity, and consistency; compare each segment across sentiment regimes.

In [None]:
def segment_tables(daily: pd.DataFrame):
    fg_daily = daily[daily['sentiment_bucket'].isin(['Fear', 'Greed'])].copy()

    trader_profile = (
        fg_daily.groupby('Account', as_index=False)
        .agg(
            active_days=('date', 'nunique'),
            total_trades=('trades', 'sum'),
            avg_trades_per_day=('trades', 'mean'),
            avg_trade_size_usd=('avg_trade_size_usd', 'mean'),
            avg_leverage_proxy=('avg_leverage_proxy', 'mean'),
            positive_day_rate=('daily_pnl_usd', lambda s: (s > 0).mean()),
            total_pnl_usd=('daily_pnl_usd', 'sum'),
            mean_daily_pnl_usd=('daily_pnl_usd', 'mean'),
            pnl_volatility_usd=('daily_pnl_usd', 'std'),
            mean_win_rate=('win_rate', 'mean'),
        )
        .sort_values('Account')
    )

    trader_profile['pnl_volatility_usd'] = trader_profile['pnl_volatility_usd'].fillna(0)
    trader_profile['mean_win_rate'] = trader_profile['mean_win_rate'].fillna(0)

    lev_q70 = float(trader_profile['avg_leverage_proxy'].quantile(0.70))
    freq_q70 = float(trader_profile['avg_trades_per_day'].quantile(0.70))
    cons_q70 = float(trader_profile['positive_day_rate'].quantile(0.70))

    trader_profile['leverage_segment'] = np.where(
        trader_profile['avg_leverage_proxy'] >= lev_q70,
        'High leverage-proxy',
        'Low leverage-proxy',
    )
    trader_profile['activity_segment'] = np.where(
        trader_profile['avg_trades_per_day'] >= freq_q70,
        'Frequent',
        'Infrequent',
    )
    trader_profile['consistency_segment'] = np.where(
        trader_profile['positive_day_rate'] >= cons_q70,
        'Consistent winners',
        'Inconsistent',
    )

    segmented = fg_daily.merge(
        trader_profile[['Account', 'leverage_segment', 'activity_segment', 'consistency_segment']],
        on='Account',
        how='left',
    )

    frames = []
    for segment_col in ('leverage_segment', 'activity_segment', 'consistency_segment'):
        grouped = (
            segmented.groupby(['sentiment_bucket', segment_col], as_index=False)
            .agg(
                account_days=('Account', 'count'),
                traders=('Account', 'nunique'),
                mean_daily_pnl_usd=('daily_pnl_usd', 'mean'),
                median_daily_pnl_usd=('daily_pnl_usd', 'median'),
                positive_day_rate=('daily_pnl_usd', lambda s: (s > 0).mean()),
                mean_win_rate=('win_rate', 'mean'),
                mean_trades=('trades', 'mean'),
                mean_trade_size_usd=('avg_trade_size_usd', 'mean'),
                mean_leverage_proxy=('avg_leverage_proxy', 'mean'),
                mean_net_long_bias=('net_long_bias', 'mean'),
                es10_daily_pnl_usd=('daily_pnl_usd', expected_shortfall_10),
            )
            .rename(columns={segment_col: 'segment'})
        )
        grouped['segment_type'] = segment_col
        frames.append(grouped)

    segment_summary = pd.concat(frames, ignore_index=True)
    thresholds = pd.DataFrame([
        {'threshold_name': 'leverage_q70', 'value': lev_q70},
        {'threshold_name': 'activity_q70', 'value': freq_q70},
        {'threshold_name': 'consistency_q70', 'value': cons_q70},
    ])

    return trader_profile, segment_summary, thresholds


trader_profile_df, segment_summary_df, thresholds_df = segment_tables(daily)

trader_profile_df.to_csv(TABLES_DIR / 'trader_profiles.csv', index=False)
segment_summary_df.to_csv(TABLES_DIR / 'segment_performance.csv', index=False)
thresholds_df.to_csv(TABLES_DIR / 'segment_thresholds.csv', index=False)

print('Segment thresholds:')
display(thresholds_df)

print('\nSegment performance (sample):')
display(segment_summary_df.head(12))


## Bonus: Predictive Model

Predict next-day profitability using sentiment + lagged behavior features.

In [None]:
def train_predictive_model(daily: pd.DataFrame, sentiment: pd.DataFrame):
    model_df = daily.merge(sentiment[['date', 'value']], on='date', how='left')
    model_df = model_df[model_df['sentiment_bucket'].isin(['Fear', 'Greed', 'Neutral'])].copy()
    model_df = model_df.sort_values(['Account', 'date'])

    model_df['next_day_pnl_usd'] = model_df.groupby('Account')['daily_pnl_usd'].shift(-1)
    model_df['next_day_profitable'] = (model_df['next_day_pnl_usd'] > 0).astype(int)

    lag_cols = [
        'daily_pnl_usd',
        'trades',
        'avg_trade_size_usd',
        'total_notional_usd',
        'avg_leverage_proxy',
        'net_long_bias',
    ]
    for col in lag_cols:
        model_df[f'lag1_{col}'] = model_df.groupby('Account')[col].shift(1)

    model_df = model_df.dropna(subset=['next_day_pnl_usd']).copy()

    features = [
        'value',
        'sentiment_bucket',
        'lag1_daily_pnl_usd',
        'lag1_trades',
        'lag1_avg_trade_size_usd',
        'lag1_total_notional_usd',
        'lag1_avg_leverage_proxy',
        'lag1_net_long_bias',
    ]

    use_df = model_df.dropna(subset=['date']).copy()
    unique_dates = sorted(use_df['date'].unique())
    split_idx = int(len(unique_dates) * 0.80)
    split_date = unique_dates[split_idx]

    train_mask = use_df['date'] < split_date
    test_mask = use_df['date'] >= split_date

    X_train = use_df.loc[train_mask, features]
    y_train = use_df.loc[train_mask, 'next_day_profitable']
    X_test = use_df.loc[test_mask, features]
    y_test = use_df.loc[test_mask, 'next_day_profitable']

    num_cols = [c for c in features if c != 'sentiment_bucket']
    cat_cols = ['sentiment_bucket']

    preprocessor = ColumnTransformer([
        (
            'num',
            Pipeline([
                ('imputer', SimpleImputer(strategy='median')),
                ('scaler', StandardScaler()),
            ]),
            num_cols,
        ),
        (
            'cat',
            Pipeline([
                ('imputer', SimpleImputer(strategy='most_frequent')),
                ('encoder', OneHotEncoder(handle_unknown='ignore')),
            ]),
            cat_cols,
        ),
    ])

    model = Pipeline([
        ('preprocessor', preprocessor),
        ('classifier', RandomForestClassifier(
            n_estimators=400,
            max_depth=6,
            min_samples_leaf=10,
            class_weight='balanced',
            random_state=42,
        )),
    ])

    model.fit(X_train, y_train)
    proba = model.predict_proba(X_test)[:, 1]
    preds = (proba >= 0.50).astype(int)

    metrics_df = pd.DataFrame([{
        'train_rows': len(X_train),
        'test_rows': len(X_test),
        'split_date': split_date,
        'test_auc': roc_auc_score(y_test, proba),
        'test_accuracy': accuracy_score(y_test, preds),
        'test_precision': precision_score(y_test, preds, zero_division=0),
        'test_recall': recall_score(y_test, preds, zero_division=0),
        'test_f1': f1_score(y_test, preds, zero_division=0),
        'train_positive_rate': float(y_train.mean()),
        'test_positive_rate': float(y_test.mean()),
    }])

    fitted_preprocessor = model.named_steps['preprocessor']
    fitted_classifier = model.named_steps['classifier']

    cat_names = (
        fitted_preprocessor.named_transformers_['cat']
        .named_steps['encoder']
        .get_feature_names_out(cat_cols)
        .tolist()
    )
    feature_names = num_cols + cat_names

    importance_df = pd.DataFrame({
        'feature': feature_names,
        'importance': fitted_classifier.feature_importances_,
    }).sort_values('importance', ascending=False)

    fpr, tpr, thresholds = roc_curve(y_test, proba)
    roc_df = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'threshold': thresholds})

    return metrics_df, importance_df, roc_df


model_metrics_df, feature_importance_df, roc_df = train_predictive_model(daily, sentiment_clean)

model_metrics_df.to_csv(TABLES_DIR / 'predictive_model_metrics.csv', index=False)
feature_importance_df.to_csv(TABLES_DIR / 'predictive_feature_importance.csv', index=False)
roc_df.to_csv(TABLES_DIR / 'predictive_model_roc_points.csv', index=False)

print('Model metrics:')
display(model_metrics_df)
print('\nTop features:')
display(feature_importance_df.head(10))


## Charts and Evidence

Create charts used to support the findings and save them for submission.

In [None]:
order = ['Fear', 'Greed']

# 1) Performance chart
plot_data = fg_daily[['sentiment_bucket', 'daily_pnl_usd']].copy()
plot_data['winsorized_pnl'] = plot_data.groupby('sentiment_bucket')['daily_pnl_usd'].transform(
    lambda s: s.clip(s.quantile(0.05), s.quantile(0.95))
)

fig, axes = plt.subplots(1, 2, figsize=(13, 5))
sns.boxplot(
    data=plot_data,
    x='sentiment_bucket',
    y='winsorized_pnl',
    hue='sentiment_bucket',
    order=order,
    ax=axes[0],
    dodge=False,
    palette=['#d66d6d', '#73a3d6'],
    legend=False,
)
axes[0].set_title('Account-Day PnL by Sentiment (Winsorized 5%-95%)')
axes[0].set_xlabel('Sentiment')
axes[0].set_ylabel('Daily PnL (USD)')

perf_plot = performance_df.set_index('sentiment_bucket').reindex(order)
x = np.arange(len(order))
width = 0.35
axes[1].bar(
    x - width / 2,
    perf_plot['positive_day_rate'],
    width=width,
    color='#4c78a8',
    label='Positive day rate',
)
axes[1].set_ylabel('Positive Day Rate')
axes[1].set_xticks(x)
axes[1].set_xticklabels(order)
axes[1].set_title('Hit Rate vs Tail-Loss Proxy')

ax2 = axes[1].twinx()
ax2.bar(
    x + width / 2,
    perf_plot['es10_daily_pnl_usd'],
    width=width,
    color='#e15759',
    alpha=0.8,
    label='ES10 daily PnL',
)
ax2.set_ylabel('Expected Shortfall 10% (USD)')

h1, l1 = axes[1].get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
axes[1].legend(h1 + h2, l1 + l2, loc='upper right')
fig.tight_layout()
fig.savefig(CHARTS_DIR / 'performance_fear_vs_greed.png', dpi=180)
plt.show()

# 2) Behavior chart
behavior_plot = behavior_df.set_index('sentiment_bucket').reindex(order).reset_index()
metric_specs = [
    ('mean_trades_per_account_day', 'Trades / Account-Day'),
    ('mean_trade_size_usd', 'Avg Trade Size (USD)'),
    ('mean_leverage_proxy', 'Leverage Proxy'),
    ('mean_net_long_bias', 'Net Long Bias'),
]

fig, axes = plt.subplots(2, 2, figsize=(13, 9))
for axis, (metric, title) in zip(axes.flatten(), metric_specs):
    sns.barplot(
        data=behavior_plot,
        x='sentiment_bucket',
        y=metric,
        hue='sentiment_bucket',
        order=order,
        ax=axis,
        dodge=False,
        palette=['#d66d6d', '#73a3d6'],
        legend=False,
    )
    axis.set_title(title)
    axis.set_xlabel('Sentiment')
    axis.set_ylabel('')
fig.tight_layout()
fig.savefig(CHARTS_DIR / 'behavior_fear_vs_greed.png', dpi=180)
plt.show()

# 3) Segment heatmap
heatmap_data = segment_summary_df[
    segment_summary_df['segment_type'].isin(['leverage_segment', 'activity_segment', 'consistency_segment'])
].copy()
heatmap_data['row_name'] = (
    heatmap_data['segment_type'].str.replace('_segment', '', regex=False)
    .str.replace('_', ' ', regex=False)
    .str.title()
    + ' | '
    + heatmap_data['segment']
)

heatmap_pivot = heatmap_data.pivot(
    index='row_name',
    columns='sentiment_bucket',
    values='mean_daily_pnl_usd',
).reindex(columns=order)

fig, ax = plt.subplots(figsize=(9, 5))
sns.heatmap(
    heatmap_pivot,
    annot=True,
    fmt='.0f',
    cmap='RdYlGn',
    center=0,
    cbar_kws={'label': 'Mean Daily PnL (USD)'},
    ax=ax,
)
ax.set_title('Segment Performance by Sentiment')
ax.set_xlabel('Sentiment')
ax.set_ylabel('Segment')
fig.tight_layout()
fig.savefig(CHARTS_DIR / 'segment_performance_heatmap.png', dpi=180)
plt.show()

# 4) ROC chart
fig, ax = plt.subplots(figsize=(7, 5))
ax.plot(roc_df['fpr'], roc_df['tpr'], color='#4c78a8', linewidth=2, label='Model ROC')
ax.plot([0, 1], [0, 1], linestyle='--', color='gray', linewidth=1, label='Random baseline')
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
ax.set_title('Next-Day Profitability Model ROC')
ax.legend(loc='lower right')
fig.tight_layout()
fig.savefig(CHARTS_DIR / 'predictive_model_roc.png', dpi=180)
plt.show()

## Part C: Actionable Output

Summarize findings into strategy rules of thumb.

In [None]:
perf = performance_df.set_index('sentiment_bucket')
beh = behavior_df.set_index('sentiment_bucket')
seg_focus = segment_summary_df[segment_summary_df['segment_type'].isin(['leverage_segment', 'activity_segment'])].copy()
seg_pivot = seg_focus.pivot_table(index=['segment_type', 'segment'], columns='sentiment_bucket', values='mean_daily_pnl_usd')

insights = [
    f"Median account-day PnL is higher in Greed ({perf.loc['Greed', 'median_daily_pnl_usd']:,.2f}) than Fear ({perf.loc['Fear', 'median_daily_pnl_usd']:,.2f}).",
    f"Fear has worse downside tail risk (ES10 {perf.loc['Fear', 'es10_daily_pnl_usd']:,.2f} vs Greed {perf.loc['Greed', 'es10_daily_pnl_usd']:,.2f}).",
    f"Traders are more active during Fear ({beh.loc['Fear', 'mean_trades_per_account_day']:.1f} vs {beh.loc['Greed', 'mean_trades_per_account_day']:.1f} trades/day) and bias flips long->short.",
    (
        "High leverage-proxy segment underperforms low leverage-proxy in both regimes: "
        f"Fear {seg_pivot.loc[('leverage_segment', 'High leverage-proxy'), 'Fear']:,.0f} vs "
        f"{seg_pivot.loc[('leverage_segment', 'Low leverage-proxy'), 'Fear']:,.0f}; "
        f"Greed {seg_pivot.loc[('leverage_segment', 'High leverage-proxy'), 'Greed']:,.0f} vs "
        f"{seg_pivot.loc[('leverage_segment', 'Low leverage-proxy'), 'Greed']:,.0f}."
    ),
]

strategy_rules = [
    "During Fear days, reduce exposure expansion for high leverage-proxy traders and cut position size before increasing frequency.",
    "Increase trade frequency only for frequent/consistent segments; infrequent traders should avoid sentiment-driven overtrading.",
]

print('Insights:')
for i, item in enumerate(insights, 1):
    print(f"{i}. {item}")

print('\nStrategy rules of thumb:')
for i, item in enumerate(strategy_rules, 1):
    print(f"{i}. {item}")

# Optional: persist concise write-up
summary_path = OUTPUT_DIR / 'summary.md'
summary_lines = ['# Hyperliquid Fear/Greed Analysis (Notebook Version)', '', '## Insights']
summary_lines += [f'- {x}' for x in insights]
summary_lines += ['', '## Strategy Ideas']
summary_lines += [f'{i}. {x}' for i, x in enumerate(strategy_rules, 1)]
summary_path.write_text('\n'.join(summary_lines), encoding='utf-8')
print(f"\nSaved summary to {summary_path}")


## Reproducibility Notes

- Run cells top-to-bottom.
- Output artifacts are written under `outputs/`.
- If matplotlib cache issues appear in restricted environments, run notebook with:
  - `MPLBACKEND=Agg`
  - `MPLCONFIGDIR=/tmp/mpl`
  - `XDG_CACHE_HOME=/tmp`