In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm

In [2]:
df = pd.read_csv('trader_sentiment_features.csv')
df['date'] = pd.to_datetime(df['date'])
print("Dataset loaded successfully.")

Dataset loaded successfully.


In [3]:
# --- 1. Correlation Analysis (Detailed) ---
print("\n--- 1. Detailed Correlation Analysis ---")
correlation_cols = [
    'index_value', 'lag_index_value', 'sentiment_change', 'is_greed',
    'total_closed_pnl', 'avg_closed_pnl', 'pnl_per_trade', 'pnl_per_volume',
    'total_trade_volume', 'net_trade_size'
]
corr_matrix = df[correlation_cols].corr()
pnl_corr = corr_matrix.loc[['index_value', 'lag_index_value', 'sentiment_change', 'is_greed'],
                           ['total_closed_pnl', 'avg_closed_pnl', 'pnl_per_trade', 'pnl_per_volume']]

print("\nCorrelation between Sentiment Metrics and PnL Metrics:")
print(pnl_corr.to_markdown(floatfmt=".3f"))


--- 1. Detailed Correlation Analysis ---

Correlation between Sentiment Metrics and PnL Metrics:
|                  |   total_closed_pnl |   avg_closed_pnl |   pnl_per_trade |   pnl_per_volume |
|:-----------------|-------------------:|-----------------:|----------------:|-----------------:|
| index_value      |             -0.083 |            0.037 |           0.037 |            0.064 |
| lag_index_value  |             -0.108 |            0.019 |           0.019 |            0.043 |
| sentiment_change |              0.068 |            0.050 |           0.050 |            0.058 |
| is_greed         |             -0.142 |            0.023 |           0.023 |            0.032 |


In [4]:
# --- 2. Grouped Analysis by Classification ---
print("\n--- 2. Grouped Analysis by Sentiment Classification ---")

# Define a custom order for classification
classification_order = ['Extreme Fear', 'Fear', 'Neutral', 'Greed', 'Extreme Greed']
df['classification'] = pd.Categorical(df['classification'], categories=classification_order, ordered=True)

# Group by classification and calculate mean and median performance
grouped_analysis = df.groupby('classification')[
    ['total_closed_pnl', 'avg_closed_pnl', 'pnl_per_trade', 'pnl_per_volume', 'trade_count']
].agg(['mean', 'median', 'count'])

print("\nMean and Median Trader Performance by Sentiment Classification:")
print(grouped_analysis.to_markdown(floatfmt=".2f"))



--- 2. Grouped Analysis by Sentiment Classification ---

Mean and Median Trader Performance by Sentiment Classification:
| classification   |   ('total_closed_pnl', 'mean') |   ('total_closed_pnl', 'median') |   ('total_closed_pnl', 'count') |   ('avg_closed_pnl', 'mean') |   ('avg_closed_pnl', 'median') |   ('avg_closed_pnl', 'count') |   ('pnl_per_trade', 'mean') |   ('pnl_per_trade', 'median') |   ('pnl_per_trade', 'count') |   ('pnl_per_volume', 'mean') |   ('pnl_per_volume', 'median') |   ('pnl_per_volume', 'count') |   ('trade_count', 'mean') |   ('trade_count', 'median') |   ('trade_count', 'count') |
|:-----------------|-------------------------------:|---------------------------------:|--------------------------------:|-----------------------------:|-------------------------------:|------------------------------:|----------------------------:|------------------------------:|-----------------------------:|-----------------------------:|-------------------------------:|--------

  grouped_analysis = df.groupby('classification')[


In [5]:
# --- 3. Statistical Significance (ANOVA) ---
# Check if the mean total_closed_pnl is significantly different across sentiment classifications
print("\n--- 3. Statistical Significance (ANOVA) ---")
pnl_by_class = [df['total_closed_pnl'][df['classification'] == c].dropna() for c in classification_order if c in df['classification'].unique()]

# Perform one-way ANOVA
try:
    f_statistic, p_value = stats.f_oneway(*pnl_by_class)
    print(f"ANOVA F-statistic for Total PnL across Sentiment Classes: {f_statistic:.3f}")
    print(f"ANOVA P-value for Total PnL across Sentiment Classes: {p_value:.3f}")
    if p_value < 0.05:
        print("Conclusion: The mean daily total PnL is significantly different across the sentiment classifications.")
    else:
        print("Conclusion: There is no statistically significant difference in mean daily total PnL across the sentiment classifications.")
except ValueError as e:
    print(f"Could not perform ANOVA: {e}")


--- 3. Statistical Significance (ANOVA) ---
ANOVA F-statistic for Total PnL across Sentiment Classes: 2.769
ANOVA P-value for Total PnL across Sentiment Classes: 0.027
Conclusion: The mean daily total PnL is significantly different across the sentiment classifications.


In [6]:
# --- 4. Regression Analysis ---
print("\n--- 4. Regression Analysis: Sentiment Index vs. Total PnL ---")

# Independent variable (X): Fear/Greed Index Value
X = df['index_value']
# Dependent variable (Y): Total Closed PnL
Y = df['total_closed_pnl']

# Add a constant to the independent variable for the intercept
X = sm.add_constant(X)

# Create and fit the OLS model
model = sm.OLS(Y, X).fit()

print(model.summary().as_text())


--- 4. Regression Analysis: Sentiment Index vs. Total PnL ---
                            OLS Regression Results                            
Dep. Variable:       total_closed_pnl   R-squared:                       0.007
Model:                            OLS   Adj. R-squared:                  0.005
Method:                 Least Squares   F-statistic:                     3.266
Date:                Mon, 03 Nov 2025   Prob (F-statistic):             0.0714
Time:                        17:45:33   Log-Likelihood:                -6022.3
No. Observations:                 478   AIC:                         1.205e+04
Df Residuals:                     476   BIC:                         1.206e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------

In [7]:
# --- 5. Regression Analysis: Lagged Sentiment Index vs. Total PnL ---
print("\n--- 5. Regression Analysis: Lagged Sentiment Index vs. Total PnL ---")

# Independent variable (X): Lagged Fear/Greed Index Value
X_lag = df['lag_index_value']
# Dependent variable (Y): Total Closed PnL
Y = df['total_closed_pnl']

# Add a constant to the independent variable for the intercept
X_lag = sm.add_constant(X_lag)

# Create and fit the OLS model
model_lag = sm.OLS(Y, X_lag).fit()

print(model_lag.summary().as_text())



--- 5. Regression Analysis: Lagged Sentiment Index vs. Total PnL ---
                            OLS Regression Results                            
Dep. Variable:       total_closed_pnl   R-squared:                       0.012
Model:                            OLS   Adj. R-squared:                  0.010
Method:                 Least Squares   F-statistic:                     5.594
Date:                Mon, 03 Nov 2025   Prob (F-statistic):             0.0184
Time:                        17:46:13   Log-Likelihood:                -6021.1
No. Observations:                 478   AIC:                         1.205e+04
Df Residuals:                     476   BIC:                         1.205e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------

In [8]:

# --- Save Analysis Results ---
with open('sentiment_performance_analysis_results.md', 'w') as f:
    f.write("# Sentiment and Trader Performance Analysis Results\n\n")
    f.write("## 1. Detailed Correlation Analysis\n")
    f.write("Correlation between Sentiment Metrics and PnL Metrics:\n")
    f.write(pnl_corr.to_markdown(floatfmt=".3f") + "\n\n")

    f.write("## 2. Grouped Analysis by Sentiment Classification\n")
    f.write("Mean and Median Trader Performance by Sentiment Classification:\n")
    f.write(grouped_analysis.to_markdown(floatfmt=".2f") + "\n\n")

    f.write("## 3. Statistical Significance (ANOVA)\n")
    f.write(f"ANOVA F-statistic for Total PnL across Sentiment Classes: {f_statistic:.3f}\n")
    f.write(f"ANOVA P-value for Total PnL across Sentiment Classes: {p_value:.3f}\n")
    f.write(f"Conclusion: {'The mean daily total PnL is significantly different across the sentiment classifications.' if p_value < 0.05 else 'There is no statistically significant difference in mean daily total PnL across the sentiment classifications.'}\n\n")

    f.write("## 4. Regression Analysis: Sentiment Index vs. Total PnL\n")
    f.write(model.summary().as_text() + "\n\n")

    f.write("## 5. Regression Analysis: Lagged Sentiment Index vs. Total PnL\n")
    f.write(model_lag.summary().as_text() + "\n\n")

print("\nAnalysis complete. Results saved to sentiment_performance_analysis_results.md.")


Analysis complete. Results saved to sentiment_performance_analysis_results.md.
