In [12]:
https://colab.research.google.com/drive/1-pNX4D2cA5KnHDE8yP8sSszfNz-luZQd?usp=sharing

SyntaxError: invalid syntax (ipython-input-2590369812.py, line 1)

In [2]:
import pandas as pd

In [3]:
# 1. Load the datasets
try:
    df_sentiment = pd.read_csv('fear_greed_index.csv')
    df_trades = pd.read_csv('historical_data.csv')
except FileNotFoundError as e:
    print(f"Error loading file: {e}")
    exit()

In [4]:
# 2. Data Preparation and Cleaning

# --- Sentiment Data Preparation ---
# Convert 'date' column to datetime and prepare for merging
df_sentiment['Date'] = pd.to_datetime(df_sentiment['date'])
df_sentiment = df_sentiment.rename(columns={'date': 'Original_Date_String'})
df_sentiment = df_sentiment[['Date', 'classification', 'value']]
df_sentiment = df_sentiment.drop_duplicates(subset=['Date']) # Ensure one sentiment per day

In [5]:
# --- Trades Data Preparation ---
# Convert 'Timestamp IST' to datetime, handling potential format issues
df_trades['Timestamp IST'] = pd.to_datetime(df_trades['Timestamp IST'], format='%d-%m-%Y %H:%M', errors='coerce')
df_trades = df_trades.dropna(subset=['Timestamp IST']) # Remove rows where conversion failed

# Extract the date part to create the merging key
df_trades['Date'] = df_trades['Timestamp IST'].dt.date
df_trades['Date'] = pd.to_datetime(df_trades['Date'])

In [6]:
# 3. Trade Aggregation: Calculate total daily PnL and trade count per account
df_account_performance = df_trades.groupby(['Account', 'Date']).agg(
    Total_Daily_PnL=('Closed PnL', 'sum'),
    Num_Daily_Trades=('Order ID', 'count')
).reset_index()

In [7]:
# 4. Merging DataFrames
# Merge the aggregated performance data with the sentiment data
df_merged = pd.merge(
    df_account_performance,
    df_sentiment,
    on='Date',
    how='inner' # Only keep records where both trade and sentiment data exists
)

In [8]:
# 5. Final Aggregation: Average PnL and Avg No. Daily trades by classification
# Calculate the average PnL and average number of trades for each sentiment classification
df_sentiment_stats = df_merged.groupby('classification').agg(
    Avg_Daily_PnL=('Total_Daily_PnL', 'mean'),
    Avg_Daily_Trades=('Num_Daily_Trades', 'mean'),
).reset_index()

In [9]:
# Sort by Avg_Daily_PnL for presentation
df_sentiment_stats_sorted = df_sentiment_stats.sort_values(by='Avg_Daily_PnL', ascending=False)

# Display the final results
print("--- Final Analysis: Average PnL and Trades by Sentiment Classification ---")
print(df_sentiment_stats_sorted)

--- Final Analysis: Average PnL and Trades by Sentiment Classification ---
  classification  Avg_Daily_PnL  Avg_Daily_Trades
2           Fear    5328.818161         98.153968
1  Extreme Greed    5161.922644         76.030418
0   Extreme Fear    4619.439053        133.750000
4        Neutral    3438.618818        100.228723
3          Greed    3318.100730         77.628086


In [10]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
# Sort by Avg_Daily_Trades for the bar chart
df_trades_sorted = df_sentiment_stats.sort_values(by='Avg_Daily_Trades', ascending=False)
# Code for Horizontal Bar Chart of Average Daily Trades
plt.figure(figsize=(10, 6))
sns.barplot(
    x='Avg_Daily_Trades',
    y='classification',
    data=df_trades_sorted,
    palette='magma'
)
plt.title('Average Number of Daily Trades per Account by Market Sentiment')
plt.xlabel('Average Number of Daily Trades')
plt.ylabel('Sentiment Classification')
plt.tight_layout()
plt.savefig('avg_daily_trades_by_sentiment_chart.png')
plt.close()


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(


In [11]:
# Filter out extreme outliers for better visualization of the main distribution
df_plot = df_merged[
    (df_merged['Total_Daily_PnL'] < 50000) &
    (df_merged['Total_Daily_PnL'] > -50000)
]

# Code for Box Plot for PnL Distribution
plt.figure(figsize=(12, 6))
sns.boxplot(
    x='Total_Daily_PnL',
    y='classification',
    data=df_plot.sort_values(by='Total_Daily_PnL', ascending=False),
    palette='Spectral',
    showfliers=False # Do not show remaining outliers for clarity of the main body
)
plt.title('Distribution of Daily PnL per Account by Market Sentiment (Excluding Extreme Outliers)')
plt.xlabel('Daily PnL (USD)')
plt.ylabel('Sentiment Classification')
plt.grid(axis='x', linestyle='--')
plt.tight_layout()
plt.savefig('pnl_distribution_boxplot.png')
plt.close()


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
