<a href="https://colab.research.google.com/github/aishwaryasbansode/ds_Aishwarya/blob/main/notebook_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ===============================
# üìå FULL PROGRAM FOR ASSIGNMENT
# ===============================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import files
import io

sns.set(style="whitegrid")

# --------------------------------------
# STEP 1 ‚Äî UPLOAD BOTH CSV FILES
# --------------------------------------
print("Upload hyperliquid_trades.csv")
up1 = files.upload()
tfile = list(up1.keys())[0]
df_trader = pd.read_csv(io.BytesIO(up1[tfile]))

print("Upload fear_greed_index.csv")
up2 = files.upload()
sfile = list(up2.keys())[0]
df_sent = pd.read_csv(io.BytesIO(up2[sfile]))

# --------------------------------------
# STEP 2 ‚Äî CHECK COLUMN NAMES
# (Prevents errors later)
# --------------------------------------
print("\nTrader Columns:", df_trader.columns.tolist())
print("\nSentiment Columns:", df_sent.columns.tolist())

# --------------------------------------
# STEP 3 ‚Äî CLEAN & PREPARE DATA
# --------------------------------------

# Convert sentiment dates
if 'Date' in df_sent.columns:
    df_sent['Date'] = pd.to_datetime(df_sent['Date'], errors='coerce').dt.date
elif 'date' in df_sent.columns:
    df_sent['Date'] = pd.to_datetime(df_sent['date'], errors='coerce').dt.date
else:
    raise ValueError("Sentiment file must contain a 'Date' or 'date' column.")

# Clean classification
if 'Classification' in df_sent.columns:
    df_sent['Classification'] = df_sent['Classification'].astype(str).str.strip().str.title()
elif 'classification' in df_sent.columns:
    df_sent['Classification'] = df_sent['classification'].astype(str).str.strip().str.title()
else:
    raise ValueError("Sentiment file must contain 'Classification' column.")

# Convert trader time ‚Üí Date
if 'time' in df_trader.columns:
    df_trader['Date'] = pd.to_datetime(df_trader['time'], errors='coerce').dt.date
elif 'timestamp' in df_trader.columns:
    df_trader['Date'] = pd.to_datetime(df_trader['timestamp'], errors='coerce').dt.date
else:
    raise ValueError("Trades file must contain 'time' or 'timestamp' column.")

# --------------------------------------
# STEP 4 ‚Äî MERGE DATASETS
# --------------------------------------
df_merged = df_trader.merge(
    df_sent[['Date', 'Classification']],
    on='Date',
    how='left'
)

print("\nMerged Data Preview:")
df_merged.head()

# --------------------------------------
# STEP 5 ‚Äî SUMMARY METRICS
# --------------------------------------

# Handle missing numeric columns
required_numeric = ['closedPnL', 'size', 'leverage']
for col in required_numeric:
    if col not in df_merged.columns:
        print(f"‚ö†Ô∏è Warning: Column '{col}' not found in trader data!")

# Profitability
pnl_summary = df_merged.groupby('Classification')['closedPnL'].mean().reset_index()
print("\nAverage PnL by Sentiment:")
print(pnl_summary)

# Volume
volume_summary = df_merged.groupby('Classification')['size'].sum().reset_index()
print("\nTotal Volume by Sentiment:")
print(volume_summary)

# Leverage
leverage_summary = df_merged.groupby('Classification')['leverage'].mean().reset_index()
print("\nAverage Leverage by Sentiment:")
print(leverage_summary)

# --------------------------------------
# STEP 6 ‚Äî VISUALIZATIONS
# --------------------------------------

# Profitability plot
plt.figure(figsize=(6,4))
sns.barplot(data=pnl_summary, x='Classification', y='closedPnL')
plt.title("Average Profitability by Market Sentiment")
plt.show()

# Volume plot
plt.figure(figsize=(6,4))
sns.barplot(data=volume_summary, x='Classification', y='size')
plt.title("Total Trade Volume by Sentiment")
plt.show()

# Leverage plot
plt.figure(figsize=(6,4))
sns.barplot(data=leverage_summary, x='Classification', y='leverage')
plt.title("Average Leverage by Market Sentiment")
plt.show()

# --------------------------------------
# STEP 7 ‚Äî EXTRA INSIGHT (Loss Rate)
# --------------------------------------

df_merged['isLoss'] = df_merged['closedPnL'] < 0

loss_rate = df_merged.groupby('Classification')['isLoss'].mean().reset_index()
print("\nLoss Rate by Sentiment:")
print(loss_rate)

plt.figure(figsize=(6,4))
sns.barplot(data=loss_rate, x='Classification', y='isLoss')
plt.title("Loss Rate by Market Sentiment")
plt.show()

print("\nüéâ Analysis Complete!")


Upload hyperliquid_trades.csv


KeyboardInterrupt: 