# Imports

In [9]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
from pylab import rcParams
from tqdm.autonotebook import tqdm

import core.signal_processing as sp
import vendors.kibot.utils as kut
import vendors.particle_one.price_movement_analysis as pma

# import vendors.particle_one.PartTask269_liquidity_analysis_utils as lau

sns.set()

rcParams["figure.figsize"] = (20, 5)

In [11]:
TAU = 2

# Load CME metadata

In [12]:
# Change this to library code from #269 once it is merged into master

In [13]:
_PRODUCT_SPECS_PATH = (
    #"/data/prices/product_slate_export_with_contract_specs_20190905.csv"
    "s3://default00-bucket/cme/product_slate_export_with_contract_specs_20190905.csv"
)
product_list = pd.read_csv(_PRODUCT_SPECS_PATH)

In [14]:
product_list.head()

Unnamed: 0,Clearing,Globex,Floor,Clearport,Product Name,Product Group,Sub Group,Category,Sub Category,Cleared As,...,Delivery Procedure Link,Trade At Marker Or Trade At Settlement Rules Link,Minimum Price Fluctuation Link,Listed Contracts Link,Termination Of Trading_0,Termination Of Trading Link,Reportable Positions,Termination Of Trading_1,CME Globex:,Reportable Positions Link
0,ED,GE,,ED,Eurodollar Futures,Interest Rate,Stirs,,,Futures,...,,,,,,,,,,
1,25,ZF,,25,5-Year T-Note Futures,Interest Rate,US Treasury,,,Futures,...,,,,,,,,,,
2,21,ZN,,21,10-Year T-Note Futures,Interest Rate,US Treasury,,,Futures,...,,,,,,,,,,
3,26,ZT,,26,2-Year T-Note Futures,Interest Rate,US Treasury,,,Futures,...,,,,,,,,,,
4,ES,ES,,ES,E-mini S&P 500 Futures,Equities,US Index,,,Futures,...,,,,,,,,,,


In [15]:
product_list["Product Group"].value_counts()

Energy           892
Equities         100
Agriculture       57
FX                55
Metals            44
Interest Rate     30
Name: Product Group, dtype: int64

In [16]:
product_list.set_index("Product Group", inplace=True)

In [17]:
commodity_groups = ["Energy", "Agriculture", "Metals"]

In [18]:
commodity_symbols = {
    group: product_list.loc[group]["Globex"].values for group in commodity_groups
}

In [19]:
commodity_symbols

{'Energy': array(['CL', 'NG', 'NN', 'N9L', 'D2L', 'HO', 'RB', 'H2L', 'HH', 'BZ',
        'V3L', 'BK', 'CSX', 'HTT', 'B0', 'NPG', 'WTT', 'B6L', 'E4L', 'HP',
        'AL1', 'AFF', 'EJL', 'LT', 'AD0', 'A1R', 'R7L', 'D4L', 'AW6', 'CU',
        'ADB', 'AC0', 'L3L', 'A4P', 'ME', 'CY', 'WOL', 'A8K', 'T7K', 'ZGL',
        'MTF', 'LPS', 'T3L', 'AE5', 'AYV', 'MFB', 'AU6', 'RBB', 'AWJ',
        'POL', 'A7Q', 'IDL', 'PWL', 'AZ9', 'RVR', 'ZJL', 'TL', 'A46', 'GZ',
        'RLX', 'K4L', 'FTL', 'HOB', 'GCU', 'CRB', 'WCW', 'AOL', 'JDL',
        'EN', 'AA6', 'PD', 'AGA', 'FO', 'N1B', 'NOI', 'MPX', 'NHN', 'NL',
        'JKM', 'MTS', 'AB3', 'NR', 'AH3', 'AP8', 'A7E', 'PGN', 'AFH',
        'AZ1', 'A8I', 'MNC', 'UV', 'SE', 'MAF', 'B7H', 'J4L', 'AYX', 'QM',
        'D3L', 'IN', 'MEO', 'QG', 'EXR', 'PM', 'TTF', 'MFF', 'PGG', 'D7L',
        'BOO', 'MXB', 'EVC', 'A5C', 'AA8', 'EWG', 'HB', 'A8O', 'RKA',
        'ARE', 'GCI', 'A1M', 'AL6', 'A1L', 'A0D', 'MBR', 'APS', 'N3L',
        'JET', 'VR', 'A9N', 'PH', 'PEL'

# Daily price movements

## Load kibot commodity daily prices

In [20]:
daily_metadata = kut.read_metadata2()
daily_metadata.head(3)

PermissionError: Access Denied

In [None]:
len(daily_metadata["Symbol"])

In [None]:
daily_metadata["Symbol"].nunique()

In [None]:
len(commodity_symbols["Energy"])

In [None]:
energy_symbols_kibot = np.intersect1d(
    daily_metadata["Symbol"].values, commodity_symbols["Energy"]
)
energy_symbols_kibot

In [None]:
len(energy_symbols_kibot)

In [None]:
commodity_symbols_kibot = {
    group: np.intersect1d(
        daily_metadata["Symbol"].values, commodity_symbols[group]
    )
    for group in commodity_symbols.keys()
}

In [None]:
commodity_symbols_kibot

In [None]:
{
    group: len(commodity_symbols_kibot[group])
    for group in commodity_symbols_kibot.keys()
}

In [None]:
comm_list = []
for comm_group in commodity_symbols_kibot.values():
    comm_list.extend(list(comm_group))
comm_list[:5]

In [None]:
file_name = "/data/kibot/All_Futures_Continuous_Contracts_daily/%s.csv.gz"

daily_price_dict_df = kut.read_multiple_symbol_data(
    comm_list, file_name, nrows=None
)

daily_price_dict_df["CL"].tail(2)

## Largest movements for a specific symbol

In [None]:
# There is a pma.get_top_movements_for_symbol() function that
# implements this code and the code below. I am not using it
# in this chapter to provide a clearer view of the algorithm.

In [None]:
symbol = "CL"

In [None]:
cl_prices = daily_price_dict_df[symbol]

In [None]:
cl_prices_diff = cl_prices["close"] - cl_prices["open"]

In [None]:
zscored_cl_prices_diff = sp.rolling_zscore(cl_prices_diff, TAU)
zscored_cl_prices_diff.head()

In [None]:
abs_zscored_cl_prices_diff = zscored_cl_prices_diff.abs()

In [None]:
abs_zscored_cl_prices_diff.max()

In [None]:
top_100_movements_cl = abs_zscored_cl_prices_diff.sort_values(
    ascending=False
).head(100)

In [None]:
top_100_movements_cl.plot(kind="bar")
ax = plt.gca()
xlabels = [item.get_text()[:10] for item in ax.get_xticklabels()]
ax.set_xticklabels(xlabels)
plt.title(
    f"Largest price movements in a single day (in z-score space) for {symbol} symbol"
)
plt.show()

In [None]:
top_100_movements_cl.index.year.value_counts(sort=False).plot(kind="bar")
plt.title("How many of the top-100 price movements occured during each year")
plt.show()

## Largest movement for energy group

In [None]:
group = "Energy"

In [None]:
commodity_symbols_kibot[group]

In [None]:
zscored_diffs = []
for symbol in commodity_symbols_kibot[group]:
    zscored_diff = pma.get_zscored_prices_diff(daily_price_dict_df, symbol)
    zscored_diffs.append(zscored_diff)

In [None]:
zscored_diffs = pd.concat(zscored_diffs, axis=1)
zscored_diffs.head()

In [None]:
mean_zscored_diffs = zscored_diffs.mean(axis=1, skipna=True)

In [None]:
mean_zscored_diffs.head()

In [None]:
mean_zscored_diffs.tail()

In [None]:
mean_zscored_diffs.sort_values(ascending=False).head(100)

## Largest movements for each group

In [None]:
top_100_movements_by_group = {
    group: pma.get_top_movements_by_group(
        daily_price_dict_df, commodity_symbols_kibot, group
    )
    for group in commodity_symbols_kibot.keys()
}

In [None]:
top_100_movements_by_group.keys()

In [None]:
top_100_movements_by_group["Energy"].head()

In [None]:
top_100_movements_by_group["Agriculture"].head()

In [None]:
top_100_movements_by_group["Metals"].head()

# 5-minute price movements

## Load 1-minute prices

In [None]:
minutely_metadata = kut.read_metadata1()

In [None]:
minutely_metadata.head()

In [None]:
np.array_equal(
    minutely_metadata["Symbol"].values, minutely_metadata["Symbol"].values
)

In [None]:
file_name = "/data/kibot/All_Futures_Continuous_Contracts_1min/%s.csv.gz"

minutely_price_dict_df = kut.read_multiple_symbol_data(
    comm_list, file_name, nrows=None
)

minutely_price_dict_df["CL"].tail(2)

In [None]:
minutely_price_dict_df["CL"].head()

In [None]:
five_min_price_dict_df = {
    symbol: minutely_price_dict_df[symbol].resample("5Min").sum()
    for symbol in minutely_price_dict_df.keys()
}

## Top movements for a symbol

In [None]:
symbol = "CL"

In [None]:
top_100_movements_cl_5_min = pma.get_top_movements_for_symbol(
    five_min_price_dict_df, symbol
)

In [None]:
top_100_movements_cl_5_min["CL"].head()

In [None]:
top_100_movements_cl_5_min.plot(kind="bar")
plt.title(
    f"Largest price movements in in a 5 min interval (in z-score space) for {symbol} symbol"
)
plt.show()

In [None]:
print(f"Top 100 of the price movements for {symbol} occur at the following time:")
print(pd.Series(top_100_movements_cl_5_min.index).dt.time.value_counts())

## Largest movements for energy group

In [None]:
group = "Energy"

In [None]:
commodity_symbols_kibot[group]

In [None]:
pma.get_top_movements_by_group(
    five_min_price_dict_df, commodity_symbols_kibot, group
)

## Largest movements for each group

In [None]:
top_100_5_min_movements_by_group = {
    group: pma.get_top_movements_by_group(
        five_min_price_dict_df, commodity_symbols_kibot, group
    )
    for group in tqdm(commodity_symbols_kibot.keys())
}

In [None]:
{
    group: head_prices_group.head()
    for group, head_prices_group in top_100_5_min_movements_by_group.items()
}