In [1]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [8]:
BASE_DIR = os.path.dirname("/content/fear_greed_index.csv")
DATA_DIR = os.path.join(BASE_DIR, "csv_files")
OUTPUT_DIR = os.path.join(BASE_DIR, "outputs")
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
plt.style.use('seaborn-v0_8')

In [9]:
fg_path = os.path.join("/content", "fear_greed_index.csv")
trades_path = os.path.join("/content", "historical_data.csv")
fear_greed = pd.read_csv(fg_path)
historical = pd.read_csv(trades_path)
required_fg = {"date", "classification", "value"}
missing_fg = required_fg - set(fear_greed.columns)
if missing_fg:
    raise KeyError(f"Fear/Greed CSV missing required columns: {missing_fg}")
lower = {c.lower().strip(): c for c in historical.columns}
def resolve(candidates):
    for key in candidates:
        if key in lower:
            return lower[key]
    return None
time_col = resolve(["time","timestamp","timestamp ist"])
pnl_col = resolve(["closedpnl","closed pnl","pnl","closed_pnl"])
side_col = resolve(["side"])
size_col = resolve(["size usd","size tokens","size"])
lev_col = resolve(["leverage"])
if None in {time_col, pnl_col, side_col, size_col}:
    raise KeyError("Historical CSV missing required trading columns")
fear_greed["date"] = pd.to_datetime(fear_greed["date"], errors="raise")
tc = historical[time_col]
if pd.api.types.is_numeric_dtype(tc):
    med = np.nanmedian(tc.astype(float)) if len(tc) else np.nan
    unit = None
    if np.isfinite(med):
        if med > 1e16:
            unit = "ns"
        elif med > 1e14:
            unit = "us"
        elif med > 1e11:
            unit = "ms"
        elif med > 1e8:
            unit = "s"
    historical[time_col] = pd.to_datetime(tc, unit=unit) if unit else pd.to_datetime(tc, errors="coerce")
else:
    coerce_num = pd.to_numeric(tc, errors="coerce")
    if coerce_num.notna().mean() > 0.8:
        med = np.nanmedian(coerce_num)
        unit = None
        if np.isfinite(med):
            if med > 1e16:
                unit = "ns"
            elif med > 1e14:
                unit = "us"
            elif med > 1e11:
                unit = "ms"
            elif med > 1e8:
                unit = "s"
        historical[time_col] = pd.to_datetime(coerce_num, unit=unit) if unit else pd.to_datetime(tc, errors="coerce")
    else:
        historical[time_col] = pd.to_datetime(tc, errors="coerce")
if historical[time_col].isna().all():
    raise ValueError(f"Could not parse timestamps in column '{time_col}'")
historical["date"] = historical[time_col].dt.normalize()
historical[pnl_col] = pd.to_numeric(historical[pnl_col], errors="coerce")
historical[size_col] = pd.to_numeric(historical[size_col], errors="coerce")
if lev_col is not None:
    historical[lev_col] = pd.to_numeric(historical[lev_col], errors="coerce")
historical["is_win"] = (historical[pnl_col] > 0).astype(int)
historical["abs_size"] = historical[size_col].abs()
agg = {
    pnl_col: ["sum","mean","std", lambda s: s.quantile(0.10)],
    "abs_size": ["sum","mean"],
    "is_win": ["mean"]
}
if lev_col is not None:
    agg[lev_col] = ["median","mean"]
daily_grp = historical.groupby("date").agg(agg)
daily_grp.columns = ["_".join([c for c in col if c]) for col in daily_grp.columns.to_flat_index()]
daily_grp = daily_grp.rename(columns={
    f"{pnl_col}_sum":"daily_pnl_sum",
    f"{pnl_col}_mean":"daily_pnl_mean",
    f"{pnl_col}_std":"daily_pnl_std",
    f"{pnl_col}_<lambda_0>":"daily_pnl_p10",
    "abs_size_sum":"total_volume",
    "abs_size_mean":"avg_trade_size",
    "is_win_mean":"win_rate"
})
if lev_col is not None:
    daily_grp = daily_grp.rename(columns={
        f"{lev_col}_median":"median_leverage",
        f"{lev_col}_mean":"avg_leverage"
    })
daily_side = (
    historical.groupby(["date",side_col]).size()
    .groupby(level=0).apply(lambda s: s / s.sum())
    .unstack(fill_value=0)
).add_prefix("side_share_")
joined = daily_grp.join(daily_side, how="left").merge(
    fear_greed[["date","classification","value"]].set_index("date"),
    left_index=True, right_index=True, how="left"
).sort_index()
if isinstance(joined.index, pd.MultiIndex):
    joined.index = joined.index.get_level_values(0)
joined_reset = joined.reset_index()
joined_reset.to_csv(os.path.join(DATA_DIR, "daily_metrics_with_sentiment.csv"), index=False)
aligned = joined.copy()
if aligned["daily_pnl_sum"].std(ddof=0) != 0:
    aligned["pnl_z"] = (aligned["daily_pnl_sum"] - aligned["daily_pnl_sum"].mean()) / aligned["daily_pnl_sum"].std(ddof=0)
else:
    aligned["pnl_z"] = 0.0
if aligned["value"].std(ddof=0) != 0:
    aligned["sentiment_z"] = (aligned["value"] - aligned["value"].mean()) / aligned["value"].std(ddof=0)
else:
    aligned["sentiment_z"] = 0.0
aligned["alignment_score"] = aligned["pnl_z"] * aligned["sentiment_z"]
aligned["pnl_direction"] = aligned["daily_pnl_sum"].diff().apply(lambda x: np.sign(x) if pd.notna(x) else 0)
aligned["sentiment_direction"] = aligned["value"].diff().apply(lambda x: np.sign(x) if pd.notna(x) else 0)
aligned["directional_alignment"] = np.where(aligned["pnl_direction"] == aligned["sentiment_direction"], 1, -1)
aligned["alignment_flag"] = np.where(
    aligned["alignment_score"] > 0,
    "Aligned",
    np.where(aligned["alignment_score"] < 0, "Divergent", "Neutral")
)
aligned["net_alignment"] = aligned["alignment_score"] * aligned["directional_alignment"]
classification_map = {
    "Extreme Fear": -2,
    "Fear": -1,
    "Neutral": 0,
    "Greed": 1,
    "Extreme Greed": 2
}
aligned["classification_score"] = aligned["classification"].map(classification_map)
aligned_reset = aligned.reset_index()
aligned_reset.to_csv(os.path.join(DATA_DIR, "daily_alignment_metrics.csv"), index=False)
alignment_summary = aligned.groupby("classification", dropna=False).agg({
    "alignment_score": "mean",
    "net_alignment": "mean",
    "daily_pnl_sum": "mean",
    "value": "mean"
}).rename(columns={
    "alignment_score": "avg_alignment",
    "net_alignment": "avg_net_alignment",
    "daily_pnl_sum": "avg_daily_pnl",
    "value": "avg_sentiment_value"
})
alignment_summary.to_csv(os.path.join(DATA_DIR, "alignment_summary_by_class.csv"))
sns.scatterplot(data=aligned, x="sentiment_z", y="pnl_z", hue="classification")
plt.axhline(0, color="gray", linewidth=1)
plt.axvline(0, color="gray", linewidth=1)
plt.title("Sentiment vs PnL (Z-scores)")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "scatter_sentiment_vs_pnl_z.png"), dpi=150)
plt.close()
sns.barplot(data=aligned_reset, x="alignment_flag", y="daily_pnl_sum", estimator=np.mean, order=["Aligned","Neutral","Divergent"])
plt.title("Average Daily PnL by Alignment Flag")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "bar_alignment_flag_pnl.png"), dpi=150)
plt.close()
heat = aligned.pivot_table(index="classification", columns="alignment_flag", values="alignment_score", aggfunc="mean")
sns.heatmap(heat, annot=True, fmt=".2f", cmap="coolwarm", cbar_kws={"label":"Mean Alignment Score"})
plt.title("Alignment Score by Sentiment Class and Flag")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "heatmap_alignment_by_class.png"), dpi=150)
plt.close()
print("daily_metrics_with_sentiment.csv saved")
print("daily_alignment_metrics.csv saved")
print("alignment_summary_by_class.csv saved")
print("plots saved in outputs/")

  historical = pd.read_csv(trades_path)


daily_metrics_with_sentiment.csv saved
daily_alignment_metrics.csv saved
alignment_summary_by_class.csv saved
plots saved in outputs/
