In [None]:
import dask
import dask.dataframe as dd
dask.config.set({"dataframe.backend": "cudf"})


eurgbp_ticks = dd.read_parquet('/home/edoardo/Desktop/python_dir/data/eurgbp_tick.parquet')

In [None]:
import seaborn as sns

eurgbp_ticks['spread'] = (eurgbp_ticks['askPrice'] - eurgbp_ticks['bidPrice']) * 10000
# Convert timestamp to datetime
eurgbp_ticks['datetime'] = dd.to_datetime(eurgbp_ticks['timestamp'])

# Extract hour
eurgbp_ticks['hour'] = eurgbp_ticks['datetime'].dt.hour

# Group by hour and calculate mean spread
hourly_spread = eurgbp_ticks.groupby('hour')['spread'].mean().compute()

hourly_spread = hourly_spread.to_pandas()

sns.set(style="whitegrid")

# Create the bar plot
ax = sns.barplot(x=hourly_spread.index, y=hourly_spread.values, color="skyblue")

# Add labels and title
ax.set(xlabel="Hour of the Day", ylabel="Average Spread (Pips)",
    title="Average EUR/GBP Spread by Hour")

# Rotate x-axis labels for better readability
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, horizontalalignment='right')


In [None]:
import matplotlib.pyplot as plt

#Select only the last 3 months of data
eurgbp_ticks = eurgbp_ticks[eurgbp_ticks['timestamp'] >= '2025-01-01']

# Convert Dask DataFrame to Pandas DataFrame
eurgbp_ticks_pd = eurgbp_ticks.compute().copy()

# Group by hour and calculate max spread
hourly_max_spread = eurgbp_ticks_pd.groupby('hour')['spread'].max().to_pandas()

# Create a subplot for the histogram
plt.figure(figsize=(18, 6))

# Boxplot on the left
plt.subplot(1, 2, 1)  # 1 row, 2 columns, first subplot
sns.boxplot(x=eurgbp_ticks_pd['spread'].to_numpy(), color="skyblue")
plt.title("Distribution of EUR/GBP Spread (Pips)", fontsize=16)
plt.xlabel("Spread (Pips)", fontsize=12)
plt.ylabel("Frequency", fontsize=12)
plt.grid(True)


# Histogram on the right
plt.subplot(1, 2, 2)  # 1 row, 2 columns, second subplot
plt.bar(hourly_max_spread.index, hourly_max_spread.values, color="lightcoral")
plt.title("Maximum EUR/GBP Spread by Hour", fontsize=16)
plt.xlabel("Hour of the Day", fontsize=12)
plt.ylabel("Max Spread (Pips)", fontsize=12)
plt.xticks(hourly_max_spread.index)
plt.grid(True)

plt.tight_layout()
plt.show()