In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from statsmodels.tsa.stattools import grangercausalitytests
import networkx as nx
from scipy.stats import zscore

sns.set_palette("husl")
plt.style.use('seaborn-v0_8')


os.makedirs('figures', exist_ok=True)
os.makedirs('output', exist_ok=True)


In [None]:
class TraderSentimentAnalyzer:
    def __init__(self, historical_data_path, fear_greed_path):
        self.historical_data_path = historical_data_path
        self.fear_greed_path = fear_greed_path
        self.historical_data = None
        self.fear_greed_data = None
        self.merged_data = None
        self.trader_metrics = None

    def load_and_preprocess_data(self):
        print("Loading historical trader data...")
        if not os.path.exists(self.historical_data_path):
            raise FileNotFoundError(f"Historical data file not found at {self.historical_data_path}")
        self.historical_data = pd.read_csv(self.historical_data_path)
        print(f"Historical columns: {self.historical_data.columns.tolist()}")
      
        if 'Timestamp IST' in self.historical_data.columns:
            self.historical_data['Timestamp IST'] = pd.to_datetime(
                self.historical_data['Timestamp IST'], errors='coerce', dayfirst=True)
            self.historical_data['Date'] = self.historical_data['Timestamp IST'].dt.date
        else:
          
            time_cols = [c for c in self.historical_data.columns if 'time' in c.lower()]
            if time_cols:
                col = time_cols[0]
                print(f"Parsing datetime from {col}")
                self.historical_data['Timestamp'] = pd.to_datetime(self.historical_data[col], errors='coerce')
                self.historical_data['Date'] = self.historical_data['Timestamp'].dt.date
            else:
                raise KeyError("No timestamp column found in historical data.")
       
        for col in ['Size USD', 'Closed PnL', 'Execution Price']:
            if col in self.historical_data.columns:
                self.historical_data[col] = pd.to_numeric(self.historical_data[col], errors='coerce')
        
        if 'Account' not in self.historical_data.columns:
            possible = [col for col in self.historical_data.columns if col.lower() == 'account']
            if possible:
                self.historical_data.rename(columns={possible[0]: 'Account'}, inplace=True)
            else:
                raise KeyError("No 'Account' column found.")
        print("Loading Fear & Greed Index data...")
        if not os.path.exists(self.fear_greed_path):
            raise FileNotFoundError(f"Fear & Greed data file not found at {self.fear_greed_path}")
        self.fear_greed_data = pd.read_csv(self.fear_greed_path)
        if 'date' not in self.fear_greed_data.columns:
            raise KeyError("No 'date' column in Fear & Greed data.")
        self.fear_greed_data['date'] = pd.to_datetime(self.fear_greed_data['date'], errors='coerce')
        self.fear_greed_data['Date'] = self.fear_greed_data['date'].dt.date
        print(f"Historical shape: {self.historical_data.shape}, FearGreed shape: {self.fear_greed_data.shape}")

    def merge_datasets(self):
        print("Merging datasets...")
        fg = self.fear_greed_data
        merge_cols = ['Date']
        if 'value' in fg.columns:
            merge_cols.append('value')
        if 'classification' in fg.columns:
            merge_cols.append('classification')
        fg_sub = fg[merge_cols].drop_duplicates(subset=['Date'])
        self.merged_data = pd.merge(self.historical_data, fg_sub, on='Date', how='left')
        if 'value' in self.merged_data.columns:
            self.merged_data.rename(columns={'value':'fear_greed_value'}, inplace=True)
        if 'classification' in self.merged_data.columns:
            self.merged_data.rename(columns={'classification':'sentiment'}, inplace=True)
        # Forward fill
        self.merged_data['fear_greed_value'] = self.merged_data['fear_greed_value'].fillna(method='ffill')
        self.merged_data['sentiment'] = self.merged_data['sentiment'].fillna(method='ffill')
        print(f"Merged shape: {self.merged_data.shape}, Date range: {self.merged_data['Date'].min()} to {self.merged_data['Date'].max()}")

    def calculate_trader_metrics(self):
        print("Calculating trader metrics...")
        rows = []
        df = self.merged_data
        for acct, group in df.groupby('Account'):
            for sent, g2 in group.groupby('sentiment'):
                total_trades = len(g2)
                total_vol = g2['Size USD'].sum() if 'Size USD' in g2 else np.nan
                total_pnl = g2['Closed PnL'].sum() if 'Closed PnL' in g2 else np.nan
                avg_size = g2['Size USD'].mean() if 'Size USD' in g2 else np.nan
                win_rate = (g2['Closed PnL']>0).mean() if 'Closed PnL' in g2 else np.nan
                avg_fg = g2['fear_greed_value'].mean() if 'fear_greed_value' in g2 else np.nan
                rows.append({'Account':acct,'Sentiment':sent,'Total_Trades':total_trades,
                             'Total_Volume_USD':total_vol,'Total_PnL':total_pnl,
                             'Avg_Trade_Size':avg_size,'Win_Rate':win_rate,'Avg_Fear_Greed_Value':avg_fg})
        self.trader_metrics = pd.DataFrame(rows)
        self.trader_metrics.to_csv('output/trader_performance_metrics.csv', index=False)
        print(f"Trader metrics saved: {self.trader_metrics.shape[0]} rows.")

    def analyze_sentiment_patterns(self):
        print("Aggregated sentiment patterns...")
        df = self.merged_data
        agg = df.groupby('sentiment').agg({
            'Size USD':['count','sum','mean','std'],
            'Closed PnL':['sum','mean','std'],
            'Execution Price':['mean','std'],
            'fear_greed_value':'mean'}).round(4)
        agg.columns = ['_'.join(col) for col in agg.columns]
        agg.to_csv('output/sentiment_analysis_summary.csv')
        return agg

    def create_visualizations(self):
        print("Creating visualizations...")
        df = self.merged_data.copy()
        # Ensure Timestamp
        if 'Timestamp IST' in df.columns:
            df['Timestamp'] = pd.to_datetime(df['Timestamp IST'], errors='coerce')
        else:
            time_cols = [c for c in df.columns if 'time' in c.lower()]
            if time_cols:
                df['Timestamp'] = pd.to_datetime(df[time_cols[0]], errors='coerce')
        df['Hour'] = df['Timestamp'].dt.hour
        # 1. Volume by sentiment
        plt.figure();
        vol = df.groupby('sentiment')['Size USD'].sum().sort_values(ascending=False)
        bars=plt.bar(vol.index,vol.values); plt.title('Total Trading Volume by Sentiment'); plt.xticks(rotation=45)
        for b in bars: plt.text(b.get_x()+b.get_width()/2,b.get_height(),f'${b.get_height():,.0f}',ha='center',va='bottom')
        plt.tight_layout(); plt.savefig('figures/volume_by_sentiment.png'); plt.close()
        # 2. Avg PnL by sentiment
        plt.figure();
        pnl= df.groupby('sentiment')['Closed PnL'].mean(); colors=['green' if x>=0 else 'red' for x in pnl.values]
        bars=plt.bar(pnl.index,pnl.values,color=colors,alpha=0.7); plt.title('Average PnL by Sentiment'); plt.axhline(0,linestyle='--',alpha=0.5); plt.xticks(rotation=45)
        for b in bars: h=b.get_height(); va='bottom' if h>=0 else 'top'; plt.text(b.get_x()+b.get_width()/2,h,f'{h:.2f}',ha='center',va=va)
        plt.tight_layout(); plt.savefig('figures/avg_pnl_by_sentiment.png'); plt.close()
        # 3. Trade distribution pie
        plt.figure(); counts=df['sentiment'].value_counts(); plt.pie(counts.values,labels=counts.index,autopct='%1.1f%%',startangle=90); plt.title('Distribution of Trades by Sentiment'); plt.tight_layout(); plt.savefig('figures/trade_distribution_pie.png'); plt.close()
        # 4. Fear & Greed over time
        plt.figure(); daily=df.groupby('Date')['fear_greed_value'].first().reset_index(); daily['Date']=pd.to_datetime(daily['Date']); daily=daily.sort_values('Date')
        plt.plot(daily['Date'],daily['fear_greed_value']); plt.fill_between(daily['Date'],daily['fear_greed_value'],alpha=0.3); plt.title('Fear & Greed Index Over Time'); plt.xticks(rotation=45); plt.axhline(25,color='red',linestyle='--',alpha=0.5); plt.axhline(75,color='green',linestyle='--',alpha=0.5); plt.tight_layout(); plt.savefig('figures/fear_greed_over_time.png'); plt.close()
        # 5. Heatmap hour vs sentiment
        heat = df.groupby(['Hour','sentiment']).size().unstack(fill_value=0)
        if not heat.empty:
            plt.figure(); sns.heatmap(heat.T,annot=True,fmt='d',cbar_kws={'label':'Number of Trades'}); plt.title('Trading Activity Heatmap'); plt.xlabel('Hour'); plt.ylabel('Sentiment'); plt.tight_layout(); plt.savefig('figures/heatmap_hour_sentiment.png'); plt.close()
        # 6. Cumulative PnL by sentiment
        plt.figure();
        for sent in df['sentiment'].dropna().unique():
            sub=df[df['sentiment']==sent].sort_values('Timestamp'); sub['Cumulative_PnL']=sub['Closed PnL'].cumsum(); plt.plot(sub['Timestamp'],sub['Cumulative_PnL'],label=sent)
        plt.title('Cumulative PnL by Sentiment'); plt.legend(); plt.xticks(rotation=45); plt.tight_layout(); plt.savefig('figures/cumulative_pnl.png'); plt.close()
        # 7. Size distribution
        plt.figure();
        for sent in df['sentiment'].dropna().unique(): plt.hist(df[df['sentiment']==sent]['Size USD'].dropna(),bins=50,alpha=0.5,density=True,label=sent)
        plt.yscale('log'); plt.title('Trade Size Distribution by Sentiment'); plt.legend(); plt.tight_layout(); plt.savefig('figures/size_distribution.png'); plt.close()
        # 8. Win rate
        plt.figure();
        labels=[]; rates=[]
        for sent in df['sentiment'].dropna().unique(): sub=df[df['sentiment']==sent]; total=len(sub); wins=(sub['Closed PnL']>0).sum(); rate=(wins/total*100 if total>0 else 0); labels.append(sent); rates.append(rate)
        bars=plt.bar(labels,rates); plt.title('Win Rate by Sentiment'); plt.ylabel('Win Rate (%)'); plt.xticks(rotation=45)
        for b in bars: plt.text(b.get_x()+b.get_width()/2,b.get_height(),f'{b.get_height():.1f}%',ha='center',va='bottom')
        plt.tight_layout(); plt.savefig('figures/win_rate.png'); plt.close()
        print("Visualizations saved in figures/")

    def generate_insights(self):
        print("Generating insights...")
        df=self.merged_data
        # Distribution
        total=len(df)
        dist=df['sentiment'].value_counts()
        insights=[]
        insights.append("Market Sentiment Distribution:")
        for sent,count in dist.items(): insights.append(f"  {sent}: {count} trades ({count/total*100:.1f}%)")
        # Profitability
        stats=df.groupby('sentiment').agg({'Closed PnL':['sum','mean'],'Size USD':'count'}).round(4)
        stats.columns=['_'.join(col) for col in stats.columns]
        insights.append("\nProfitability by Sentiment:")
        for sent,row in stats.iterrows(): insights.append(f"  {sent}: Total PnL=${row['Closed PnL_sum']:.2f}, Avg PnL=${row['Closed PnL_mean']:.4f}, Trades={int(row['Size USD_count'])}")
        # Volume
        vol=df.groupby('sentiment')['Size USD'].agg(['sum','mean']).round(2)
        insights.append("\nVolume Analysis:")
        for sent,row in vol.iterrows(): insights.append(f"  {sent}: Total Volume=${row['sum']:.2f}, Avg Size=${row['mean']:.2f}")
        # Correlations
        corr_pnl=df['fear_greed_value'].corr(df['Closed PnL'])
        corr_size=df['fear_greed_value'].corr(df['Size USD'])
        insights.append(f"\nCorrelation Fear & Greed vs PnL: {corr_pnl:.4f}")
        insights.append(f"Correlation Fear & Greed vs Size: {corr_size:.4f}")
        # Recommendations
        mean_pnl= df.groupby('sentiment')['Closed PnL'].mean()
        most_prof= mean_pnl.idxmax() if not mean_pnl.empty else None
        insights.append(f"\nMost Profitable Sentiment: {most_prof}")
        sum_vol= df.groupby('sentiment')['Size USD'].sum()
        high_vol= sum_vol.idxmax() if not sum_vol.empty else None
        insights.append(f"Highest Volume Sentiment: {high_vol}")
        std_pnl=df.groupby('sentiment')['Closed PnL'].std()
        low_risk= std_pnl.idxmin() if not std_pnl.empty else None
        insights.append(f"Lowest Risk Sentiment: {low_risk}")
        insights.append("\nActionable Insights:")
        insights.append("  - Consider contrarian trades around extremes")
        insights.append("  - Exploit volume opportunities during fear")
        insights.append("  - Adjust risk management by sentiment")
        insights.append("  - Monitor sentiment transitions for timing")
        # Risk-adjusted
        insights.append("\nRisk-Adjusted Returns:")
        for sent in mean_pnl.index:
            m=mean_pnl[sent]; s=std_pnl[sent]
            if s and s>0: insights.append(f"  {sent}: {m/s:.4f}")
        # Extreme
        ext_fear=df[df['fear_greed_value']<=25]
        ext_greed=df[df['fear_greed_value']>=75]
        if not ext_fear.empty: insights.append(f"\nExtreme Fear (<=25) Avg PnL: ${ext_fear['Closed PnL'].mean():.4f}")
        if not ext_greed.empty: insights.append(f"Extreme Greed (>=75) Avg PnL: ${ext_greed['Closed PnL'].mean():.4f}")
        # Save insights
        with open('output/insights.txt','w') as f:
            f.write("\n".join(insights))
        print("Insights saved to output/insights.txt")


In [12]:
class AdvancedTraderAnalysis:
    def __init__(self, merged_path):
        if not os.path.exists(merged_path): raise FileNotFoundError
        self.df=pd.read_csv(merged_path, parse_dates=['Timestamp IST','Date'], dayfirst=True)
        if 'Timestamp IST' in self.df.columns:
            self.df['Timestamp']=pd.to_datetime(self.df['Timestamp IST'],errors='coerce')
        else:
            time_cols=[c for c in self.df.columns if 'time' in c.lower()]
            if time_cols: self.df['Timestamp']=pd.to_datetime(self.df[time_cols[0]],errors='coerce')
        self.df['Date']=pd.to_datetime(self.df['Date'],errors='coerce')

    def predictive_model(self):
        print("Predictive model...")
        df=self.df.copy(); df['target']=(df['Closed PnL']>0).astype(int); df['Hour']=df['Timestamp'].dt.hour
        features=['Size USD','Execution Price','fear_greed_value','Hour']
        dfm=df[features+['target']].dropna()
        X=dfm[features]; y=dfm['target']
        X_train,X_test,y_train,y_test=train_test_split(X,y,stratify=y,random_state=42)
        model=RandomForestClassifier(n_estimators=100,random_state=42)
        model.fit(X_train,y_train)
        y_pred=model.predict(X_test)
        report=classification_report(y_test,y_pred); cm=confusion_matrix(y_test,y_pred)
        with open('output/predictive_report.txt','w') as f: f.write(report + '\nConfusion Matrix:\n' + str(cm))
        print("Predictive report saved.")

    def time_series_forecasting(self):
        print("Time-series forecasting...")
        df=self.df.copy(); df_daily=df.groupby('Date').agg({'Closed PnL':'mean','fear_greed_value':'mean'}).dropna()
        try:
            model=SARIMAX(df_daily['Closed PnL'], exog=df_daily[['fear_greed_value']], order=(1,1,1))
            res=model.fit(disp=False)
            last_exog=df_daily[['fear_greed_value']].iloc[-10:]
            forecast=res.forecast(steps=10, exog=last_exog)
            forecast.to_csv('output/forecast_next10.csv')
            print("Forecast saved to output/forecast_next10.csv")
        except Exception as e:
            print(f"Forecast error: {e}")

    def detect_anomalies(self):
        print("Anomaly detection...")
        df=self.df; feats=['Size USD','Closed PnL','Execution Price']
        df2=df[feats].dropna()
        iso=IsolationForest(contamination=0.02,random_state=42)
        df2['anomaly']=iso.fit_predict(df2)
        anomalies=df2[df2['anomaly']==-1]
        anomalies.to_csv('output/anomalous_trades.csv',index=False)
        print(f"Anomalies saved: {len(anomalies)} rows.")

    def sentiment_transition_analysis(self):
        print("Sentiment transitions...")
        df=self.df.sort_values('Timestamp'); df['prev_sent']=df['sentiment'].shift(1); df['change']=df['sentiment']!=df['prev_sent']
        trans=df[df['change']]
        res=trans.groupby(['prev_sent','sentiment'])['Closed PnL'].mean().reset_index()
        res.columns=['From','To','Avg_PnL']; res.to_csv('output/sentiment_transitions.csv',index=False)
        print("Sentiment transitions saved.")

    def plot_volume_and_pnl_over_time(self):
        print("Plot volume and avg PnL...")
        df=self.df; df_daily=df.groupby('Date').agg({'Size USD':'sum','Closed PnL':'mean'})
        plt.figure(); df_daily['Size USD'].plot(title='Total Volume Over Time'); plt.tight_layout(); plt.savefig('figures/volume_time_series.png'); plt.close()
        plt.figure(); df_daily['Closed PnL'].plot(title='Average PnL Over Time'); plt.tight_layout(); plt.savefig('figures/avg_pnl_time_series.png'); plt.close()
        print("Time series plots saved.")


In [None]:

class ExtendedTraderAnalysis:
    def __init__(self, merged_path):
        if not os.path.exists(merged_path): raise FileNotFoundError
        self.df=pd.read_csv(merged_path,parse_dates=['Timestamp IST','Date'],dayfirst=True)
        if 'Timestamp IST' in self.df.columns: self.df['Timestamp']=pd.to_datetime(self.df['Timestamp IST'],errors='coerce')
        else:
            tcols=[c for c in self.df.columns if 'time' in c.lower()]
            if tcols: self.df['Timestamp']=pd.to_datetime(self.df[tcols[0]],errors='coerce')
        self.df['Date']=pd.to_datetime(self.df['Date'],errors='coerce')
        # daily
        self.daily=self.df.groupby('Date').agg(avg_pnl=('Closed PnL','mean'),avg_fg=('fear_greed_value','mean')).dropna().sort_index()
        self.trader_metrics = pd.read_csv('output/trader_performance_metrics.csv') if os.path.exists('output/trader_performance_metrics.csv') else None

    def cluster_traders(self, n_clusters=4):
        print("Clustering traders...")
        if self.trader_metrics is None:
            print("Trader metrics missing."); return
        feat=['Win_Rate','Total_PnL','Avg_Trade_Size'] if 'Win_Rate' in self.trader_metrics.columns else []
       
        # Here use columns: Win_Rate, Avg_Trade_Size, Total_PnL, perhaps Avg_Fear_Greed_Value
        cols=[c for c in ['Win_Rate','Avg_Trade_Size','Total_PnL','Avg_Fear_Greed_Value'] if c in self.trader_metrics.columns]
        dfm=self.trader_metrics.dropna(subset=cols)
        X=StandardScaler().fit_transform(dfm[cols])
        labels=KMeans(n_clusters=n_clusters,random_state=42).fit_predict(X)
        dfm['Cluster']=labels; dfm.to_csv('output/trader_clusters.csv',index=False)
        print(f"Clusters saved, sample:\n{dfm.head()}" )

    def time_lag_correlation(self, max_lag=10):
        print("Time-lag correlation...")
        series_pnl=self.daily['avg_pnl']; series_fg=self.daily['avg_fg']
        results=[]
        for lag in range(-max_lag,max_lag+1):
            if lag<0: corr=series_fg.shift(-lag).corr(series_pnl)
            else: corr=series_fg.shift(lag).corr(series_pnl)
            results.append({'lag':lag,'corr':corr})
        df_lag=pd.DataFrame(results); df_lag.to_csv('output/time_lag_correlation.csv',index=False)
        plt.figure(); plt.bar(df_lag['lag'],df_lag['corr']); plt.title('Cross-correlation'); plt.xlabel('Lag'); plt.ylabel('Correlation'); plt.tight_layout(); plt.savefig('figures/time_lag_correlation.png'); plt.close()
        print("Time-lag correlation saved.")

    def granger_causality(self, maxlag=5):
        print("Granger causality tests...")
        data=self.daily[['avg_pnl','avg_fg']].dropna()
        try:
            g1=grangercausalitytests(data[['avg_pnl','avg_fg']],maxlag=maxlag,verbose=False)
            p1={lag:g1[lag][0]['ssr_ftest'][1] for lag in g1}
            with open('output/granger_sentiment_to_pnl.txt','w') as f: f.write(str(p1))
            print(f"Sentiment->PnL p-values: {p1}")
        except Exception as e: print(f"Error: {e}")
        try:
            g2=grangercausalitytests(data[['avg_fg','avg_pnl']],maxlag=maxlag,verbose=False)
            p2={lag:g2[lag][0]['ssr_ftest'][1] for lag in g2}
            with open('output/granger_pnl_to_sentiment.txt','w') as f: f.write(str(p2))
            print(f"PnL->Sentiment p-values: {p2}")
        except Exception as e: print(f"Error: {e}")

    def event_study_extreme(self, window=5):
        print("Event study around extremes...")
        df=self.daily
        ext_fear=df[df['avg_fg']<=25].index
        ext_greed=df[df['avg_fg']>=75].index
        def comp(dates):
            rec=[]
            for d in dates:
                for off in range(-window,window+1):
                    day=d+timedelta(days=off)
                    if day in df.index: rec.append({'event_date':d,'offset':off,'avg_pnl':df.at[day,'avg_pnl']})
            return pd.DataFrame(rec)
        df_f=comp(ext_fear); df_g=comp(ext_greed)
        agg_f=df_f.groupby('offset')['avg_pnl'].mean().reset_index(); agg_g=df_g.groupby('offset')['avg_pnl'].mean().reset_index()
        plt.figure(); plt.plot(agg_f['offset'],agg_f['avg_pnl'],label='Fear'); plt.plot(agg_g['offset'],agg_g['avg_pnl'],label='Greed'); plt.axvline(0,linestyle='--'); plt.title('Event Study'); plt.xlabel('Days relative'); plt.ylabel('Avg PnL'); plt.legend(); plt.tight_layout(); plt.savefig('figures/event_study_extreme.png'); plt.close()
        agg_f.to_csv('output/event_study_fear.csv',index=False); agg_g.to_csv('output/event_study_greed.csv',index=False)
        print("Event study results saved.")

    def pca_tsne_trader_metrics(self):
        print("PCA/t-SNE on trader metrics...")
        if not os.path.exists('output/trader_performance_metrics.csv'): print("Metrics missing"); return
        dfm=pd.read_csv('output/trader_performance_metrics.csv')
        cols=['Win_Rate','Avg_Trade_Size','Total_PnL','Avg_Fear_Greed_Value']
        cols=[c for c in cols if c in dfm.columns]
        dfm=dfm.dropna(subset=cols)
        X=StandardScaler().fit_transform(dfm[cols])
        pca=PCA(n_components=2,random_state=42).fit_transform(X)
        dfm['PC1'],dfm['PC2']=pca[:,0],pca[:,1]
        plt.figure(); sns.scatterplot(x='PC1',y='PC2',data=dfm); plt.title('PCA'); plt.tight_layout(); plt.savefig('figures/pca_trader_metrics.png'); plt.close()
        tsne=TSNE(n_components=2,random_state=42,perplexity=30).fit_transform(X)
        dfm['TSNE1'],dfm['TSNE2']=tsne[:,0],tsne[:,1]
        plt.figure(); sns.scatterplot(x='TSNE1',y='TSNE2',data=dfm); plt.title('t-SNE'); plt.tight_layout(); plt.savefig('figures/tsne_trader_metrics.png'); plt.close()
        dfm.to_csv('output/trader_metrics_vis.csv',index=False)
        print("PCA/t-SNE visualizations saved.")

    def correlation_network(self, min_corr=0.3):
        print("Building correlation network...")
        pivot=self.df.pivot_table(index='Date',columns='Account',values='Closed PnL',aggfunc='mean')
        corr=pivot.corr().fillna(0)
        G=nx.Graph(); accounts=corr.columns.tolist(); G.add_nodes_from(accounts)
        for i in range(len(accounts)):
            for j in range(i+1,len(accounts)):
                val=corr.iat[i,j]
                if abs(val)>=min_corr: G.add_edge(accounts[i],accounts[j],weight=val)
        plt.figure(figsize=(8,8)); pos=nx.spring_layout(G,seed=42); weights=[abs(d['weight']) for (_,_,d) in G.edges(data=True)]; nx.draw_networkx_nodes(G,pos,node_size=20); nx.draw_networkx_edges(G,pos,width=[w*2 for w in weights],alpha=0.5); plt.title(f'Correlation Network |corr|>={min_corr}'); plt.axis('off'); plt.tight_layout(); plt.savefig('figures/trader_network.png'); plt.close()
        nx.write_gexf(G,'output/trader_correlation_network.gexf')
        print(f"Network saved: nodes={G.number_of_nodes()}, edges={G.number_of_edges()}")


In [19]:
def generate_report():
    print("Generating report.md...")
    lines=[]
    lines.append("# Trader Behavior & Market Sentiment Analysis Report")
    lines.append("\n## 1. Introduction")
    lines.append("This report explores relationships between trader performance and Bitcoin market sentiment (Fear & Greed index).")
    lines.append("\n## 2. Data")
    lines.append("- Historical trader data: columns include Account, Timestamp IST, Size USD, Closed PnL, Execution Price, etc.")
    lines.append("- Fear & Greed index: date, value, classification (e.g., Fear, Greed).")
    lines.append("\n## 3. Preprocessing & Merging")
    lines.append("Datasets were loaded, timestamps parsed, numeric columns converted, then merged on date with forward-filled missing sentiment/index.")
    # Summary of merged data
    try:
        df=pd.read_csv('output/merged_trader_sentiment_data.csv')
        lines.append(f"- Merged dataset shape: {df.shape}")
    except:
        lines.append("- Merged dataset summary not found.")
    lines.append("\n## 4. Trader Metrics")
    try:
        tm=pd.read_csv('output/trader_performance_metrics.csv')
        lines.append(f"- Trader metrics: {tm.shape[0]} account-sentiment entries.")
        lines.append(tm.head().to_markdown(index=False))
    except:
        lines.append("- Trader metrics not available.")
    lines.append("\n## 5. Aggregated Sentiment Patterns")
    try:
        sa=pd.read_csv('output/sentiment_analysis_summary.csv', index_col=0)
        lines.append(sa.reset_index().to_markdown(index=False))
    except:
        lines.append("- Sentiment summary not available.")
    lines.append("\n## 6. Visualizations")
    figs=['volume_by_sentiment.png','avg_pnl_by_sentiment.png','trade_distribution_pie.png',
          'fear_greed_over_time.png','heatmap_hour_sentiment.png','cumulative_pnl.png',
          'size_distribution.png','win_rate.png','volume_time_series.png','avg_pnl_time_series.png',
          'time_lag_correlation.png','event_study_extreme.png','pca_trader_metrics.png','tsne_trader_metrics.png','trader_network.png']
    for f in figs:
        path=os.path.join('figures',f)
        if os.path.exists(path): lines.append(f"![{f}]({path})")
    lines.append("\n## 7. Insights")
    try:
        ins=open('output/insights.txt').read().splitlines()
        lines.append("```")
        lines.extend(ins)
        lines.append("```")
    except:
        lines.append("- Insights not available.")
    lines.append("\n## 8. Advanced Analyses")
    adv_files=['output/predictive_report.txt','output/forecast_next10.csv','output/anomalous_trades.csv','output/sentiment_transitions.csv','output/time_lag_correlation.csv','output/event_study_fear.csv','output/event_study_greed.csv']
    for af in adv_files:
        if os.path.exists(af):
            lines.append(f"### {af}\n```")
            with open(af) as f: lines.append(f.read())
            lines.append("```")
    lines.append("\n## 9. Methods & Models")
    lines.append("- Classification: RandomForest to predict profitable trades using Size USD, Execution Price, FearGreed value, Hour.")
    lines.append("- Forecasting: SARIMAX on avg daily PnL with sentiment as exogenous.")
    lines.append("- Anomaly Detection: IsolationForest on Size USD, PnL, Execution Price.")
    lines.append("- Clustering: KMeans on trader performance features.")
    lines.append("- Time-lag correlation and Granger causality between sentiment and avg PnL.")
    lines.append("- Event study around extreme sentiment days.")
    lines.append("- PCA and t-SNE for visualization of trader clusters.")
    lines.append("- Correlation network among traders based on daily PnL correlations.")
    lines.append("\n## 10. Conclusions & Recommendations")
    lines.append("- Summarize key findings: which sentiments yield higher avg returns, risk profiles, predictive power of sentiment, timing strategies around extremes, clustering insights.")
    lines.append("- Recommendations: contrarian strategies, risk adjustments, monitoring sentiment transitions, anomaly monitoring.")
    with open('report.md','w') as f:
        f.write("\n\n".join(lines))
    print("report.md generated.")

In [None]:
if __name__=="__main__":
    
    hist_path='data/historical_data.csv'
    fg_path='data/fear_greed_index.csv'
    merged_path='output/merged_trader_sentiment_data.csv'
 
    analyzer=TraderSentimentAnalyzer(hist_path,fg_path)
    analyzer.load_and_preprocess_data()
    analyzer.merge_datasets()
   
    analyzer.merged_data.to_csv(merged_path,index=False)
    analyzer.calculate_trader_metrics()
    analyzer.analyze_sentiment_patterns()
    analyzer.create_visualizations()
    analyzer.generate_insights()
    
    adv=AdvancedTraderAnalysis(merged_path)
    adv.predictive_model()
    adv.time_series_forecasting()
    adv.detect_anomalies()
    adv.sentiment_transition_analysis()
    adv.plot_volume_and_pnl_over_time()
    ext=ExtendedTraderAnalysis(merged_path)
    ext.cluster_traders(n_clusters=4)
    ext.time_lag_correlation(max_lag=10)
    ext.granger_causality(maxlag=5)
    ext.event_study_extreme(window=5)
    ext.pca_tsne_trader_metrics()
    ext.correlation_network(min_corr=0.3)
    
    generate_report()
    print("Full analysis completed. See report.md, figures/, and output/ for results.")


Loading historical trader data...
Historical columns: ['Account', 'Coin', 'Execution Price', 'Size Tokens', 'Size USD', 'Side', 'Timestamp IST', 'Start Position', 'Direction', 'Closed PnL', 'Transaction Hash', 'Order ID', 'Crossed', 'Fee', 'Trade ID', 'Timestamp']
Loading Fear & Greed Index data...
Historical shape: (211224, 17), FearGreed shape: (2644, 5)
Merging datasets...
Merged shape: (211224, 19), Date range: 2023-05-01 to 2025-05-01
Calculating trader metrics...
Trader metrics saved: 156 rows.
Aggregated sentiment patterns...
Creating visualizations...
Visualizations saved in figures/
Generating insights...
Insights saved to output/insights.txt
Predictive model...
Predictive report saved.
Time-series forecasting...
Forecast saved to output/forecast_next10.csv
Anomaly detection...
Anomalies saved: 4225 rows.
Sentiment transitions...
Sentiment transitions saved.
Plot volume and avg PnL...
Time series plots saved.
Clustering traders...
Clusters saved, sample:
                      