In [None]:
# === DS Assignment: Trader Behavior vs Market Sentiment ===
import os, math, json
from datetime import datetime
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# -------------------------------
# üóÇÔ∏è Folder Setup
# -------------------------------
ROOT = "ds_Abhishek_Rawal"  # your folder name
CSV_DIR = os.path.join(ROOT, "csv_files")
OUT_DIR = os.path.join(ROOT, "outputs")

os.makedirs(CSV_DIR, exist_ok=True)
os.makedirs(OUT_DIR, exist_ok=True)

# -------------------------------
# üìÅ Paths (update if needed)
# -------------------------------
# Use raw strings (r"") to avoid escape errors on Windows
TRADER_PATH = r"C:\Users\hp\Desktop\ds_assignment\ds_YourName\csv_files\historical_data.csv"
SENTI_PATH  = r"C:\Users\hp\Desktop\ds_assignment\ds_YourName\csv_files\fear_greed_index.csv"

MERGED_CSV_PATH = os.path.join(CSV_DIR, "merged_sentiment_trades.csv")
PDF_PATH        = os.path.join(ROOT, "ds_report.pdf")

# -------------------------------
# üìä Load CSVs
# -------------------------------
trader_df = pd.read_csv(TRADER_PATH)
senti_df  = pd.read_csv(SENTI_PATH)

# -------------------------------
# üßπ Parse Trade Time
# -------------------------------
if "Timestamp IST" in trader_df.columns:
    trader_df["trade_dt"] = pd.to_datetime(trader_df["Timestamp IST"], dayfirst=True, errors="coerce")
elif "Timestamp" in trader_df.columns:
    def parse_epoch_ms(x):
        try:
            val = int(float(x))
            if len(str(val)) <= 10:  # seconds ‚Üí ms
                val = val * 1000
            return pd.to_datetime(val, unit="ms")
        except Exception:
            return pd.NaT
    trader_df["trade_dt"] = trader_df["Timestamp"].apply(parse_epoch_ms)
elif "time" in trader_df.columns:
    trader_df["trade_dt"] = pd.to_datetime(trader_df["time"], errors="coerce")
else:
    trader_df["trade_dt"] = pd.NaT

trader_df["trade_date"] = trader_df["trade_dt"].dt.date

# -------------------------------
# üî¢ Convert Numeric Columns Safely
# -------------------------------
for col in ["Execution Price", "Size Tokens", "Size USD", "Closed PnL", "Fee"]:
    if col in trader_df.columns:
        trader_df[col] = pd.to_numeric(trader_df[col], errors="coerce")

# -------------------------------
# üîÅ Side Normalization (fixed .upper() bug)
# -------------------------------
if "Side" in trader_df.columns:
    trader_df["side_norm"] = trader_df["Side"].astype(str).str.upper().str.strip()
elif "Direction" in trader_df.columns:
    trader_df["side_norm"] = trader_df["Direction"].astype(str).str.upper().str.strip()
else:
    trader_df["side_norm"] = np.nan

# üßÆ Profit Label
trader_df["is_profit"] = np.where(trader_df.get("Closed PnL", pd.Series([np.nan]*len(trader_df))).fillna(0) > 0, 1, 0)

# -------------------------------
# üìà Parse Sentiment Data
# -------------------------------
if "date" in senti_df.columns:
    senti_df["senti_date"] = pd.to_datetime(senti_df["date"], errors="coerce").dt.date
elif "Date" in senti_df.columns:
    senti_df["senti_date"] = pd.to_datetime(senti_df["Date"], errors="coerce").dt.date
else:
    if "timestamp" in senti_df.columns:
        senti_df["senti_date"] = pd.to_datetime(senti_df["timestamp"], unit="s", errors="coerce").dt.date
    else:
        senti_df["senti_date"] = pd.NaT

def map_sentiment(cls):
    if not isinstance(cls, str):
        return np.nan
    c = cls.strip().lower()
    if "extreme fear" in c or c == "fear":
        return "Fear"
    if "extreme greed" in c or c == "greed":
        return "Greed"
    if "neutral" in c:
        return "Neutral"
    if "fear" in c:
        return "Fear"
    if "greed" in c:
        return "Greed"
    return "Neutral"

if "classification" in senti_df.columns:
    senti_df["sentiment_bucket"] = senti_df["classification"].apply(map_sentiment)
elif "Classification" in senti_df.columns:
    senti_df["sentiment_bucket"] = senti_df["Classification"].apply(map_sentiment)
else:
    senti_df["sentiment_bucket"] = np.nan

score_col = "value" if "value" in senti_df.columns else None
if score_col:
    senti_df["sentiment_score"] = pd.to_numeric(senti_df[score_col], errors="coerce")
else:
    senti_df["sentiment_score"] = np.nan

senti_keep = senti_df[["senti_date", "sentiment_bucket", "sentiment_score"]].dropna(subset=["senti_date"])

# -------------------------------
# üîó Merge Trader & Sentiment
# -------------------------------
merged = pd.merge(trader_df, senti_keep, left_on="trade_date", right_on="senti_date", how="left")
merged["sentiment_bucket"] = merged["sentiment_bucket"].fillna("Unknown")
merged.to_csv(MERGED_CSV_PATH, index=False)

# -------------------------------
# üìä Aggregations & Summaries
# -------------------------------
summary_by_senti = merged.groupby("sentiment_bucket").agg({
    "Closed PnL": ["count", "mean", "median", "std", "sum"],
    "Size USD": ["mean", "median", "std", "sum"],
    "sentiment_score": ["mean"]
}).reset_index()

winrate = merged.groupby("sentiment_bucket")["is_profit"].mean().reset_index().rename(columns={"is_profit": "win_rate"})

# -------------------------------
# üìâ Visualizations
# -------------------------------
# 1Ô∏è‚É£ Average PnL by Sentiment
if "Closed PnL" in merged.columns:
    avg_pnl = merged.groupby("sentiment_bucket")["Closed PnL"].mean().reindex(["Fear","Neutral","Greed","Unknown"]).dropna()
    plt.figure()
    avg_pnl.plot(kind="bar")
    plt.title("Average Closed PnL by Sentiment")
    plt.xlabel("Sentiment")
    plt.ylabel("Average Closed PnL")
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, "avg_pnl_by_sentiment.png"), dpi=150)
    plt.show()

# 2Ô∏è‚É£ Boxplot ‚Äì Trade Size USD
if "Size USD" in merged.columns:
    plt.figure()
    order = [c for c in ["Fear", "Neutral", "Greed", "Unknown"] if c in merged["sentiment_bucket"].unique().tolist()]
    data = [merged.loc[merged["sentiment_bucket"]==c, "Size USD"].dropna().values for c in order]
    plt.boxplot(data, labels=order, showmeans=True)
    plt.title("Trade USD Size Distribution by Sentiment")
    plt.xlabel("Sentiment")
    plt.ylabel("Trade Size (USD)")
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, "size_usd_by_sentiment.png"), dpi=150)
    plt.show()

# 3Ô∏è‚É£ Side Distribution (BUY/SELL)
if "side_norm" in merged.columns and merged["side_norm"].notna().any():
    counts = merged.groupby(["sentiment_bucket", "side_norm"]).size().unstack(fill_value=0)
    counts = counts.reindex(index=[c for c in ["Fear","Neutral","Greed","Unknown"] if c in counts.index])
    x = np.arange(len(counts))
    plt.figure()
    bottom = np.zeros(len(counts))
    for side in counts.columns:
        plt.bar(x, counts[side].values, bottom=bottom, label=str(side))
        bottom = bottom + counts[side].values
    plt.xticks(x, counts.index.tolist())
    plt.title("Trade Side Counts by Sentiment")
    plt.xlabel("Sentiment")
    plt.ylabel("Trade Count")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, "trade_side_distribution.png"), dpi=150)
    plt.show()

# 4Ô∏è‚É£ Correlation Heatmap
num_cols = merged.select_dtypes(include=[np.number]).columns.tolist()
if len(num_cols) >= 2:
    corr = merged[num_cols].corr()
    plt.figure()
    plt.imshow(corr, aspect="auto")
    plt.xticks(range(len(num_cols)), num_cols, rotation=90)
    plt.yticks(range(len(num_cols)), num_cols)
    plt.colorbar()
    plt.title("Correlation Heatmap (Numeric Columns)")
    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, "correlation_heatmap.png"), dpi=150)
    plt.show()

# -------------------------------
# üìÑ PDF Report Generation
# -------------------------------
with PdfPages(PDF_PATH) as pdf:
    plt.figure(figsize=(8.5, 11))
    plt.axis("off")
    lines = [
        "Data Science Report: Trader Behavior vs Market Sentiment",
        f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
        "",
        f"Total Trades: {len(merged):,}",
    ]
    if "Closed PnL" in merged.columns:
        wr = (merged["Closed PnL"] > 0).mean()
        lines.append(f"Overall Win Rate: {wr*100:.2f}%")
    y = 0.95
    for line in lines:
        plt.text(0.05, y, line, fontsize=12, va="top")
        y -= 0.04
    pdf.savefig()
    plt.close()

    for name in ["avg_pnl_by_sentiment.png", "size_usd_by_sentiment.png",
                 "trade_side_distribution.png", "correlation_heatmap.png"]:
        pth = os.path.join(OUT_DIR, name)
        if os.path.exists(pth):
            img = plt.imread(pth)
            plt.figure(figsize=(11, 8.5))
            plt.imshow(img)
            plt.axis("off")
            plt.title(name)
            pdf.savefig()
            plt.close()

# -------------------------------
# ‚úÖ Show Results in Console
# -------------------------------
print("\n===== Summary by Sentiment =====")
print(summary_by_senti)
print("\n===== Win Rate by Sentiment =====")
print(winrate)
print("\nAll outputs and ds_report.pdf generated successfully ‚úÖ")


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\hp\\Desktop\\ds_assignment\\ds_Abhishek_Rawal\\csv_files\\historical_data.csv'