In [64]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import os

sns.set(style='whitegrid')

In [65]:
trades = pd.read_csv('/content/historical_data.csv', low_memory=False)
fg = pd.read_csv('/content/fear_greed_index.csv', low_memory=False)

print("Trades shape:", trades.shape)
print("Fear-Greed shape:", fg.shape)

Trades shape: (211224, 16)
Fear-Greed shape: (2644, 4)


In [66]:

trades.columns = [c.strip().lower().replace(' ', '_') for c in trades.columns]
fg.columns = [c.strip().lower() for c in fg.columns]


In [68]:

trades.rename(columns={
    'coin':'symbol',
    'size_tokens':'size_tokens',
    'size_usd':'size_usd',
    'timestamp_ist':'time',
    'closed_pnl':'closedpnl'
}, inplace=True)
fg.rename(columns={'classification':'sentiment', 'value':'fg_value'}, inplace=True)


In [69]:
trades['time'] = pd.to_datetime(trades['time'], format='%d-%m-%Y %H:%M', errors='coerce')
trades['date'] = trades['time'].dt.date

fg['date'] = pd.to_datetime(fg['date'], errors='coerce').dt.date

fg['sentiment'] = fg['sentiment'].replace({'Extreme Fear':'Fear','Extreme Greed':'Greed'})
fg['sentiment'] = fg['sentiment'].str.title()

In [70]:
trades = trades.dropna(subset=['account','symbol','execution_price','closedpnl','time'])
fg = fg.dropna(subset=['date','sentiment'])

In [71]:
os.makedirs('/content/ds_myassignment/csv_files', exist_ok=True)
os.makedirs('/content/ds_myassignment/outputs', exist_ok=True)
outputs_dir = '/content/ds_myassignment/outputs'


In [72]:
trades.to_csv('/content/ds_myassignment/csv_files/trades_cleaned.csv', index=False)
fg.to_csv('/content/ds_myassignment/csv_files/fear_greed_cleaned.csv', index=False)
print("Cleaned files saved!")


Cleaned files saved!


In [33]:
fg['date'] = fg['date'].dt.date

merged = trades.merge(fg[['date','sentiment']], on='date', how='left')

print("Merged shape:", merged.shape)
merged.head()

Merged shape: (211224, 18)


Unnamed: 0,account,symbol,execution_price,size_tokens,size_usd,side,time,start_position,direction,closedpnl,transaction_hash,order_id,crossed,fee,trade_id,timestamp,date,sentiment
0,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9769,986.87,7872.16,BUY,2024-12-02 22:50:00,0.0,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.345404,895000000000000.0,1730000000000.0,2024-12-02,Greed
1,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.98,16.0,127.68,BUY,2024-12-02 22:50:00,986.524596,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.0056,443000000000000.0,1730000000000.0,2024-12-02,Greed
2,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9855,144.09,1150.63,BUY,2024-12-02 22:50:00,1002.518996,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050431,660000000000000.0,1730000000000.0,2024-12-02,Greed
3,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9874,142.98,1142.04,BUY,2024-12-02 22:50:00,1146.558564,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.050043,1080000000000000.0,1730000000000.0,2024-12-02,Greed
4,0xae5eacaf9c6b9111fd53034a602c192a04e082ed,@107,7.9894,8.73,69.75,BUY,2024-12-02 22:50:00,1289.488521,Buy,0.0,0xec09451986a1874e3a980418412fcd0201f500c95bac...,52017706630,True,0.003055,1050000000000000.0,1730000000000.0,2024-12-02,Greed


In [73]:
merged = trades.merge(fg[['date','sentiment']], on='date', how='left')
merged = merged.dropna(subset=['sentiment'])  # Ensure no missing sentiments

In [75]:
daily_metrics = merged.groupby(['date','sentiment']).agg(
    total_trades=('account','count'),
    total_volume_usd=('size_usd','sum'),
    total_pnl=('closedpnl','sum'),
    avg_pnl=('closedpnl','mean')
).reset_index()

print("Daily metrics shape:", daily_metrics.shape)
display(daily_metrics.head())


Daily metrics shape: (479, 6)


Unnamed: 0,date,sentiment,total_trades,total_volume_usd,total_pnl,avg_pnl
0,2023-05-01,Greed,3,477.0,0.0,0.0
1,2023-12-05,Greed,9,50005.83,0.0,0.0
2,2023-12-14,Greed,11,113203.35,-205.434737,-18.675885
3,2023-12-15,Greed,2,10609.95,-24.632034,-12.316017
4,2023-12-16,Greed,3,15348.77,0.0,0.0


In [76]:
daily_metrics['sentiment_num'] = daily_metrics['sentiment'].map({'Fear':0,'Greed':1})

In [78]:
# Total PnL vs Sentiment
plt.figure(figsize=(8,5))
sns.boxplot(x='sentiment', y='total_pnl', data=daily_metrics, color='skyblue')
plt.title('Total Daily PnL by Market Sentiment')
plt.savefig(os.path.join(outputs_dir, 'total_pnl_vs_sentiment.png'))
plt.close()

# Average PnL vs Sentiment
plt.figure(figsize=(8,5))
sns.boxplot(x='sentiment', y='avg_pnl', data=daily_metrics, color='skyblue')
plt.title('Average Daily PnL by Market Sentiment')
plt.savefig(os.path.join(outputs_dir, 'avg_pnl_vs_sentiment.png'))
plt.close()

# Total Trades vs Sentiment
plt.figure(figsize=(8,5))
sns.boxplot(x='sentiment', y='total_trades', data=daily_metrics, color='skyblue')
plt.title('Daily Trade Count by Market Sentiment')
plt.savefig(os.path.join(outputs_dir, 'total_trades_vs_sentiment.png'))
plt.close()

# Correlation Matrix
corr = daily_metrics[['sentiment_num','total_trades','total_volume_usd','total_pnl','avg_pnl']].corr()
plt.figure(figsize=(6,5))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.savefig(os.path.join(outputs_dir, 'correlation_matrix.png'))
plt.close()

# Pairplot of metrics
sns.pairplot(daily_metrics, hue='sentiment', vars=['total_trades','total_volume_usd','total_pnl','avg_pnl'])
plt.savefig(os.path.join(outputs_dir, 'pairplot_metrics.png'))
plt.close()

# PnL Over Time
plt.figure(figsize=(10,5))
sns.lineplot(x='date', y='total_pnl', hue='sentiment', data=daily_metrics)
plt.title('Daily PnL Over Time by Sentiment')
plt.savefig(os.path.join(outputs_dir, 'pnl_over_time.png'))
plt.close()

print("All plots saved in outputs folder!")


All plots saved in outputs folder!


In [79]:
features = ['sentiment_num','total_trades','total_volume_usd']
X = daily_metrics[features]
y = np.log1p(daily_metrics['total_pnl'].abs())  # log-transform

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

print("R2 Score after log transform:", r2_score(y_test, y_pred))

R2 Score after log transform: 0.46841189766561264


In [80]:
plt.figure(figsize=(6,4))
plt.bar(features, rf.feature_importances_)
plt.title('Feature Importance')
plt.ylabel('Importance')
plt.savefig(os.path.join(outputs_dir, 'feature_importance.png'))
plt.close()
print("feature_importance.png saved!")


feature_importance.png saved!
