# Analyzing Trade History Signals for Squid Ink - Round 2

This notebook analyzes the trade history data for Squid Ink in Round 2 to look for potential trading signals.

In [None]:
# Import necessary libraries
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Add parent directories to path for imports
sys.path.append(os.path.abspath('../'))
from trade_utils import (
    load_all_trade_data, filter_product_trades, calculate_trade_volume,
    calculate_trade_value, calculate_vwap_from_trades, plot_trade_prices,
    plot_trade_quantities, plot_trade_volume, plot_trade_value, plot_vwap,
    analyze_trade_direction, analyze_trade_size_distribution, analyze_trade_price_distribution
)

# Configure plots to be larger and more readable
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 12

# Try to import seaborn for better styling
try:
    import seaborn as sns
    sns.set(style="whitegrid")
    print("Using Seaborn for plot styling")
except ImportError:
    print("Seaborn not available, using matplotlib default styling")

## 1. Load Trade History Data

First, let's load the trade history data for Round 2 and filter for Squid Ink trades.

In [None]:
# Load all trade data for Round 2
trades = load_all_trade_data(2)

# Filter for Squid Ink trades
squid_trades = filter_product_trades(trades, 'SQUID_INK')

# Display the first few rows
squid_trades.head()

## 2. Analyze Trade Volume and Value

Let's analyze the trade volume and value over time to look for patterns.

In [None]:
# Calculate trade volume in 5-minute windows
squid_trades_indexed = squid_trades.set_index('timestamp')
volume_5min = calculate_trade_volume(squid_trades_indexed, time_window='5min')

# Plot trade volume
plot_trade_volume(volume_5min, title='Squid Ink Trade Volume (5-minute windows) - Round 2')

In [None]:
# Calculate trade value in 5-minute windows
value_5min = calculate_trade_value(squid_trades_indexed, time_window='5min')

# Plot trade value
plot_trade_value(value_5min, title='Squid Ink Trade Value (5-minute windows) - Round 2')

## 3. Calculate VWAP from Trade History

Let's calculate the Volume-Weighted Average Price (VWAP) from the trade history data.

In [None]:
# Calculate VWAP from trade history in 5-minute windows
vwap_5min = calculate_vwap_from_trades(squid_trades_indexed, time_window='5min')

# Plot VWAP
plot_vwap(vwap_5min, title='Squid Ink VWAP from Trade History (5-minute windows) - Round 2')

## 4. Analyze Trade Direction

Let's analyze the direction of trades (buy/sell) to look for patterns.

In [None]:
# Analyze trade direction
direction_counts = analyze_trade_direction(squid_trades)

# Plot trade direction distribution
plt.figure(figsize=(10, 6))
direction_counts.plot(kind='bar')
plt.title('Squid Ink Trade Direction Distribution - Round 2')
plt.xlabel('Direction')
plt.ylabel('Count')
plt.grid(True)
plt.tight_layout()
plt.show()

## 5. Analyze Trade Imbalance Over Time

Let's calculate and visualize the trade imbalance (buy volume - sell volume) over time.

In [None]:
# Create a copy of the dataframe with timestamp as index
squid_trades_copy = squid_trades.copy()
if squid_trades_copy.index.name != 'timestamp':
    squid_trades_copy.set_index('timestamp', inplace=True)

# Calculate trade imbalance in 5-minute windows
squid_trades_copy['imbalance'] = squid_trades_copy['quantity']
imbalance_5min = squid_trades_copy['imbalance'].resample('5min').sum()

# Plot trade imbalance
plt.figure(figsize=(14, 7))
plt.plot(imbalance_5min.index, imbalance_5min.values)
plt.title('Squid Ink Trade Imbalance (5-minute windows) - Round 2')
plt.xlabel('Timestamp')
plt.ylabel('Imbalance (Buy - Sell Volume)')
plt.axhline(y=0, color='r', linestyle='-', alpha=0.3)
plt.grid(True)
plt.tight_layout()
plt.show()

## 6. Analyze Relationship Between Trade Imbalance and Price Movement

Let's analyze the relationship between trade imbalance and subsequent price movements.

In [None]:
# Calculate VWAP from trades in 1-minute windows
vwap_1min = calculate_vwap_from_trades(squid_trades_indexed, time_window='1min')

# Calculate trade imbalance in 1-minute windows
imbalance_1min = squid_trades_copy['imbalance'].resample('1min').sum()

# Calculate price changes
vwap_changes = vwap_1min.pct_change().shift(-1)  # Next period's price change

# Combine imbalance and price changes
combined = pd.DataFrame({
    'imbalance': imbalance_1min,
    'price_change': vwap_changes
})

# Drop rows with NaN values
combined = combined.dropna()

# Plot relationship between imbalance and price change
plt.figure(figsize=(10, 6))
plt.scatter(combined['imbalance'], combined['price_change'], alpha=0.5)
plt.title('Relationship Between Trade Imbalance and Subsequent Price Change')
plt.xlabel('Trade Imbalance (1-minute window)')
plt.ylabel('Subsequent Price Change')
plt.grid(True)
plt.tight_layout()
plt.show()

# Calculate correlation
correlation = combined['imbalance'].corr(combined['price_change'])
print(f"Correlation between trade imbalance and subsequent price change: {correlation:.4f}")

## 7. Analyze Large Trades

Let's identify and analyze large trades to see if they have predictive power for future price movements.

In [None]:
# Calculate absolute trade size
squid_trades_copy['abs_quantity'] = squid_trades_copy['quantity'].abs()

# Define large trades (e.g., top 5% by size)
large_trade_threshold = squid_trades_copy['abs_quantity'].quantile(0.95)
large_trades = squid_trades_copy[squid_trades_copy['abs_quantity'] >= large_trade_threshold].copy()

print(f"Large trade threshold (95th percentile): {large_trade_threshold}")
print(f"Number of large trades: {len(large_trades)}")
print(f"Percentage of all trades: {len(large_trades) / len(squid_trades_copy) * 100:.2f}%")

# Plot large trades
plt.figure(figsize=(14, 7))
plt.scatter(large_trades.index, large_trades['price'], 
            s=large_trades['abs_quantity']/10, alpha=0.6, 
            c=large_trades['quantity'] > 0, cmap='coolwarm')
plt.title('Large Squid Ink Trades - Round 2')
plt.xlabel('Timestamp')
plt.ylabel('Price')
plt.colorbar(label='Direction (Blue=Sell, Red=Buy)')
plt.grid(True)
plt.tight_layout()
plt.show()

## 8. Analyze Price Impact of Large Trades

Let's analyze the price impact of large trades by looking at price movements before and after large trades.

In [None]:
# Function to calculate price changes around large trades
def calculate_price_impact(large_trades_df, all_trades_df, window_minutes=5):
    """Calculate price changes before and after large trades."""
    # Ensure both dataframes have timestamp as index
    if large_trades_df.index.name != 'timestamp':
        large_trades_df = large_trades_df.set_index('timestamp')
    if all_trades_df.index.name != 'timestamp':
        all_trades_df = all_trades_df.set_index('timestamp')
    
    # Calculate VWAP in 1-minute windows
    vwap_1min = calculate_vwap_from_trades(all_trades_df, time_window='1min')
    
    # Initialize lists to store results
    before_changes = []
    after_changes = []
    directions = []
    
    # Loop through each large trade
    for timestamp in large_trades_df.index:
        # Get trade direction
        direction = 'buy' if large_trades_df.loc[timestamp, 'quantity'] > 0 else 'sell'
        directions.append(direction)
        
        # Find closest VWAP timestamps before and after the trade
        before_timestamp = vwap_1min.index[vwap_1min.index < timestamp][-window_minutes:]
        after_timestamp = vwap_1min.index[vwap_1min.index > timestamp][:window_minutes]
        
        if len(before_timestamp) > 0 and len(after_timestamp) > 0:
            # Calculate price change before the trade
            before_price = vwap_1min.loc[before_timestamp[0]]
            trade_price = vwap_1min.loc[vwap_1min.index[vwap_1min.index >= timestamp][0]]
            before_change = (trade_price - before_price) / before_price
            before_changes.append(before_change)
            
            # Calculate price change after the trade
            after_price = vwap_1min.loc[after_timestamp[-1]]
            after_change = (after_price - trade_price) / trade_price
            after_changes.append(after_change)
    
    # Create a DataFrame with the results
    impact_df = pd.DataFrame({
        'direction': directions,
        'before_change': before_changes,
        'after_change': after_changes
    })
    
    return impact_df

# Calculate price impact of large trades
impact_df = calculate_price_impact(large_trades, squid_trades_copy, window_minutes=5)

# Display summary statistics
print("Price Impact of Large Trades:")
print("\nBefore large trades:")
print(impact_df['before_change'].describe())
print("\nAfter large trades:")
print(impact_df['after_change'].describe())

# Group by direction and calculate average price changes
direction_impact = impact_df.groupby('direction')[['before_change', 'after_change']].mean()
print("\nAverage price changes by direction:")
print(direction_impact)

# Plot price impact by direction
plt.figure(figsize=(10, 6))
direction_impact.plot(kind='bar')
plt.title('Average Price Impact of Large Trades by Direction')
plt.xlabel('Trade Direction')
plt.ylabel('Average Price Change')
plt.grid(True)
plt.tight_layout()
plt.show()

## 9. Analyze Trade Frequency and Clustering

Let's analyze the frequency of trades and look for clustering patterns.

In [None]:
# Calculate trade frequency in 1-minute windows
trade_counts = squid_trades_copy.resample('1min').size()

# Plot trade frequency
plt.figure(figsize=(14, 7))
plt.plot(trade_counts.index, trade_counts.values)
plt.title('Squid Ink Trade Frequency (1-minute windows) - Round 2')
plt.xlabel('Timestamp')
plt.ylabel('Number of Trades')
plt.grid(True)
plt.tight_layout()
plt.show()

# Calculate correlation between trade frequency and price volatility
vwap_1min_returns = vwap_1min.pct_change()
vwap_1min_volatility = vwap_1min_returns.rolling(window=5).std()

# Combine trade frequency and volatility
freq_vol = pd.DataFrame({
    'frequency': trade_counts,
    'volatility': vwap_1min_volatility
})
freq_vol = freq_vol.dropna()

# Calculate correlation
freq_vol_corr = freq_vol['frequency'].corr(freq_vol['volatility'])
print(f"Correlation between trade frequency and price volatility: {freq_vol_corr:.4f}")

# Plot relationship between trade frequency and volatility
plt.figure(figsize=(10, 6))
plt.scatter(freq_vol['frequency'], freq_vol['volatility'], alpha=0.5)
plt.title('Relationship Between Trade Frequency and Price Volatility')
plt.xlabel('Number of Trades (1-minute window)')
plt.ylabel('Price Volatility (5-minute rolling std of returns)')
plt.grid(True)
plt.tight_layout()
plt.show()

## 10. Summary of Findings

Let's summarize our findings from the trade history analysis.

In [None]:
# Print summary of findings
print("Summary of Trade History Analysis for Squid Ink - Round 2:\n")

print(f"1. Total number of trades: {len(squid_trades)}")
print(f"2. Correlation between trade imbalance and subsequent price change: {correlation:.4f}")
print(f"3. Correlation between trade frequency and price volatility: {freq_vol_corr:.4f}")

print("\n4. Price Impact of Large Trades:")
print(direction_impact)

print("\n5. Trade Direction Distribution:")
for direction, count in direction_counts.items():
    print(f"   {direction.capitalize()}: {count} ({count / len(squid_trades) * 100:.2f}%)")

# Identify potential signals
print("\nPotential Trading Signals from Trade History:")

if abs(correlation) > 0.1:
    print(f"- Trade imbalance shows {correlation:.4f} correlation with subsequent price changes")
    if correlation > 0:
        print("  Signal: Buy when there's positive trade imbalance (more buys than sells)")
    else:
        print("  Signal: Buy when there's negative trade imbalance (more sells than buys)")

if abs(freq_vol_corr) > 0.1:
    print(f"- Trade frequency shows {freq_vol_corr:.4f} correlation with price volatility")
    print("  Signal: Higher trade frequency may indicate upcoming volatility")

# Check if large buys predict price increases
if 'buy' in direction_impact.index and direction_impact.loc['buy', 'after_change'] > 0:
    print(f"- Large buy trades are followed by average price increases of {direction_impact.loc['buy', 'after_change']*100:.2f}%")
    print("  Signal: Consider buying after observing large buy trades")

# Check if large sells predict price decreases
if 'sell' in direction_impact.index and direction_impact.loc['sell', 'after_change'] < 0:
    print(f"- Large sell trades are followed by average price decreases of {direction_impact.loc['sell', 'after_change']*100:.2f}%")
    print("  Signal: Consider selling after observing large sell trades")

## 11. Save Processed Data for Future Use

Let's save the processed data for future use.

In [None]:
# Create output directory if it doesn't exist
output_dir = '../data'
os.makedirs(output_dir, exist_ok=True)

# Save trade imbalance data
imbalance_5min.to_csv(os.path.join(output_dir, 'squid_trade_imbalance_5min.csv'))
print(f"Trade imbalance data saved to {os.path.join(output_dir, 'squid_trade_imbalance_5min.csv')}")

# Save VWAP data
vwap_5min.to_csv(os.path.join(output_dir, 'squid_trade_vwap_5min.csv'))
print(f"VWAP data saved to {os.path.join(output_dir, 'squid_trade_vwap_5min.csv')}")

# Save large trades data
large_trades.to_csv(os.path.join(output_dir, 'squid_large_trades.csv'))
print(f"Large trades data saved to {os.path.join(output_dir, 'squid_large_trades.csv')}")