In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
os.makedirs("csv_files", exist_ok=True)
os.makedirs("outputs", exist_ok=True)

In [4]:
sentiment_path = "fear_greed_index.csv"
trades_path    = "historical_data.csv"

In [5]:
sentiment_raw = pd.read_csv(sentiment_path)
trades_raw    = pd.read_csv(trades_path)

print("Sentiment columns:", list(sentiment_raw.columns))
print("Trades columns:", list(trades_raw.columns))

Sentiment columns: ['timestamp', 'value', 'classification', 'date']
Trades columns: ['Account', 'Coin', 'Execution Price', 'Size Tokens', 'Size USD', 'Side', 'Timestamp IST', 'Start Position', 'Direction', 'Closed PnL', 'Transaction Hash', 'Order ID', 'Crossed', 'Fee', 'Trade ID', 'Timestamp']


In [6]:
sentiment = sentiment_raw.copy()

In [7]:
if 'date' in sentiment.columns:
    sentiment['sentiment_date'] = pd.to_datetime(sentiment['date'], errors='coerce')
else:

    if 'timestamp' in sentiment.columns:
        ts = pd.to_numeric(sentiment['timestamp'], errors='coerce')
        if ts.dropna().median() > 1e11:
            sentiment['sentiment_date'] = pd.to_datetime(ts, unit='ms', errors='coerce')
        else:
            sentiment['sentiment_date'] = pd.to_datetime(ts, unit='s', errors='coerce')
    else:
        raise ValueError("Sentiment CSV must include either 'date' or 'timestamp'.")

In [8]:
sentiment['classification_norm'] = (
    sentiment['classification']
      .astype(str)
      .str.strip()
      .str.lower()
      .replace({
          'fear': 'fear',
          'extreme fear': 'fear',
          'greed': 'greed',
          'extreme greed': 'greed'
      })
)

sentiment['sentiment_flag'] = sentiment['classification_norm'].map({'fear': 0, 'greed': 1})

In [9]:
sentiment_clean = sentiment[['sentiment_date', 'classification_norm', 'sentiment_flag', 'value']].rename(columns={'value': 'sentiment_value'})

In [10]:
trades = trades_raw.copy()


In [11]:
trade_time = None
if 'Timestamp IST' in trades.columns:
    trade_time = pd.to_datetime(trades['Timestamp IST'], errors='coerce')
else:
    trade_time = pd.Series(pd.NaT, index=trades.index)

if 'Timestamp' in trades.columns:
    ts_num = pd.to_numeric(trades['Timestamp'], errors='coerce')
    trade_time_epoch = pd.to_datetime(ts_num, unit='ms', errors='coerce')
    trade_time = trade_time.fillna(trade_time_epoch)

trades['trade_time'] = trade_time
trades['trade_date'] = trades['trade_time'].dt.normalize()

In [12]:
for col in ['Execution Price', 'Size Tokens', 'Size USD', 'Closed PnL', 'Fee', 'Start Position']:
    if col in trades.columns:
        trades[col] = pd.to_numeric(trades[col], errors='coerce')

In [13]:
if 'Side' in trades.columns:
    trades['side_raw'] = trades['Side'].astype(str).str.upper().str.strip()
    trades['side_sign'] = trades['side_raw'].map({'BUY': 1, 'SELL': -1}).fillna(0)
else:
    trades['side_raw'] = np.nan
    trades['side_sign'] = 0

In [14]:
if 'Crossed' in trades.columns:
    trades['crossed_flag'] = trades['Crossed'].astype(str).str.lower().map({'true': 1, 'false': 0})
else:
    trades['crossed_flag'] = np.nan

In [15]:
trades['notional_usd'] = trades.get('Size USD')
if trades['notional_usd'].isna().all():
    if ('Execution Price' in trades.columns) and ('Size Tokens' in trades.columns):
        trades['notional_usd'] = trades['Execution Price'] * trades['Size Tokens']

In [16]:
if 'Closed PnL' in trades.columns:
    trades['pnl'] = trades['Closed PnL']
    trades['profit_flag'] = (trades['pnl'] > 0).astype(int)
else:
    trades['pnl'] = np.nan
    trades['profit_flag'] = np.nan

In [17]:
sentiment_clean['sentiment_day'] = sentiment_clean['sentiment_date'].dt.normalize()
merged = pd.merge(
    trades,
    sentiment_clean[['sentiment_day', 'classification_norm', 'sentiment_flag', 'sentiment_value']],
    left_on='trade_date', right_on='sentiment_day', how='inner'
)

print("Merged shape:", merged.shape)
print(merged[['trade_time','Coin','Side','pnl','notional_usd','classification_norm','sentiment_flag']].head())

Merged shape: (142132, 28)
           trade_time  Coin Side  pnl  notional_usd classification_norm  \
0 2024-02-12 22:50:00  @107  BUY  0.0       7872.16               greed   
1 2024-02-12 22:50:00  @107  BUY  0.0        127.68               greed   
2 2024-02-12 22:50:00  @107  BUY  0.0       1150.63               greed   
3 2024-02-12 22:50:00  @107  BUY  0.0       1142.04               greed   
4 2024-02-12 22:50:00  @107  BUY  0.0         69.75               greed   

   sentiment_flag  
0             1.0  
1             1.0  
2             1.0  
3             1.0  
4             1.0  
