
# Imports

Importing all required modules.

In [24]:
%load_ext autoreload
%autoreload 2

import datetime as dt
import os

import pandas as pd

import helpers.s3 as hs3
import vendors2.kibot.data.load as kdl
import vendors2.kibot.data.load.file_path_generator as fpgen
import vendors2.kibot.data.types as types

# This will be changed later when Exchange will be developed.
EXCHANGE = "TestExchange"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Define helper functions to calculate the report.

In [25]:
def slice_price_data(
    prices: pd.DataFrame, last_years: int
) -> pd.DataFrame:
    """Slice DataFrames for each symbol to contain records only for the
    last_years years.

    :param prices: price for symbol dataframe.
    :param last_years: Number of years data is averaged to.
    :return: dataframe sliced.
    """
    now = dt.datetime.now()
    # TODO(vr): check if dateutils.relativedate is better?
    before = now - dt.timedelta(days=last_years * 365)
    return prices.loc[before:now]


def get_start_date(prices_df: pd.DataFrame) -> str:
    """Extract start dates for each time series.

    :param prices_df: dataframe with prices
    :return: start date as string
    """
    start_date = prices_df.index[0].strftime("%Y-%m-%d")
    return start_date


def get_price_data(
    prices_df: pd.DataFrame,
    price_col: str,
    agg_func: str,
) -> float:
    """Get grouped prices for each symbol.

    :param prices_df: dataframe with prices
    :param price_col: The name of the price column
    :param agg_func: The name of the aggregation function that needs to
        be applied to the prices for each symbol
    :return:
    """
    val = getattr(prices_df[price_col], agg_func)()
    return val

Define main method to generate the report for a dataset.

In [26]:
def generate_report(
    exchange: str,
    asset_class: types.AssetClass,
    contract_type: types.ContractType,
    frequency: types.Frequency
) -> pd.DataFrame:
    """Generate a report for a dataset.

    :param exchange:
    :param frequency: `D` or `T` for daily or minutely data respectively
    :param asset_class:
    :param contract_type: `continuous` or `expiry`
    :return: a dataframe with the report
    """
    dataset_aws_path = fpgen.FilePathGenerator().generate_file_path( "",
        frequency, asset_class, contract_type, ext=types.Extension.CSV
    )
    dataset_aws_directory = os.path.dirname(dataset_aws_path)
    # Get a list of payloads (symbols) in format XYZ.csv.gz.
    payloads = hs3.listdir(dataset_aws_directory, mode="non-recursive")
    # Get only first n-rows.
    n_rows = 10
    # Get only symbols list.
    symbols = tuple(
        payload.replace(".csv.gz", "") for payload in payloads[:n_rows]
    )
    # Read dataframes.
    kibot_data_loader = kdl.S3KibotDataLoader()
    report_data = []
    for symbol in symbols:
        df = kibot_data_loader.read_data(exchange, symbol, asset_class, frequency, contract_type)
        # Get avg. vol for the last 1 year
        price_1y_df = slice_price_data(df, last_years=1)
        mean_1y_vol = get_price_data(price_1y_df, "vol", "mean")
        # Get avg. vol for the last 3 years
        price_3y_df = slice_price_data(df, last_years=3)
        mean_3y_vol = get_price_data(price_3y_df, "vol", "mean")
        # Get avg. vol for the last 5 years
        price_5y_df = slice_price_data(df, last_years=5)
        mean_5y_vol = get_price_data(price_5y_df, "vol", "mean")
        # Get start date for each symbol.
        start_date = get_start_date(df)
        report_data.append((symbol, start_date, mean_1y_vol, mean_3y_vol, mean_5y_vol))
    report = pd.DataFrame.from_records(
        report_data,
        index="symbol",
        columns=["symbol", "start_date", "mean_1y_vol", "mean_3y_vol", "mean_5y_vol"]
    )
    report.fillna(0, inplace=True)
    return report

Report for all_futures_contracts_1min

In [27]:
dataset_report = generate_report(
    EXCHANGE,
    types.AssetClass.Futures,
    types.ContractType.Expiry,
    types.Frequency.Minutely
)
dataset_report

Unnamed: 0_level_0,start_date,mean_1y_vol,mean_3y_vol,mean_5y_vol
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AC,2009-09-28,0.0,3.685601,4.585687
ACF10,2009-01-05,0.0,0.0,0.0
ACF11,2009-10-02,0.0,0.0,0.0
ACF12,2010-05-04,0.0,0.0,0.0
ACF13,2011-07-27,0.0,0.0,0.0
ACF14,2012-11-27,0.0,0.0,0.0
ACF15,2014-02-05,0.0,0.0,0.0
ACF16,2015-04-21,0.0,0.0,0.0
ACF17,2016-06-08,0.0,0.0,6.778603
ACF18,2017-07-20,0.0,0.0,4.630482


Report for all_futures_contracts_daily

In [28]:
dataset_report = generate_report(
    EXCHANGE,
    types.AssetClass.Futures,
    types.ContractType.Expiry,
    types.Frequency.Daily
)
dataset_report

Unnamed: 0_level_0,start_date,mean_1y_vol,mean_3y_vol,mean_5y_vol
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AC,2005-05-05,0.0,188.381963,214.191126
ACF10,2008-01-14,0.0,0.0,0.0
ACF11,2008-02-25,0.0,0.0,0.0
ACF12,2009-01-07,0.0,0.0,0.0
ACF13,2010-01-08,0.0,0.0,0.0
ACF14,2011-01-06,0.0,0.0,0.0
ACF15,2012-01-05,0.0,0.0,0.0
ACF16,2013-01-04,0.0,0.0,0.0
ACF17,2014-01-06,0.0,0.0,76.443983
ACF18,2015-01-06,0.0,0.0,20.979675


Report for all_futures_continuous_contracts_1min

In [29]:
dataset_report = generate_report(
    EXCHANGE,
    types.AssetClass.Futures,
    types.ContractType.Continuous,
    types.Frequency.Minutely
)
dataset_report

Unnamed: 0_level_0,start_date,mean_1y_vol,mean_3y_vol,mean_5y_vol
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AC,2009-09-28,0.0,3.685601,4.585687
AD,2009-09-27,0.0,74.652497,71.047113
AE,2007-05-16,0.0,126.350903,115.620952
AEX,2009-09-28,0.0,35.686105,37.444023
AJY,2009-10-21,0.0,5.635328,5.240059
ALJ,2014-03-31,0.0,31.641741,35.119711
ALM,2014-03-31,0.0,4.682867,5.02836
BB,2009-09-27,0.0,6.539458,7.235888
BBN,2011-09-25,0.0,122.062252,118.091746
BD,2009-09-28,0.0,698.576599,718.721924


Report for all_futures_continuous_contracts_daily

In [30]:
dataset_report = generate_report(
    EXCHANGE,
    types.AssetClass.Futures,
    types.ContractType.Continuous,
    types.Frequency.Daily
)
dataset_report

Unnamed: 0_level_0,start_date,mean_1y_vol,mean_3y_vol,mean_5y_vol
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AC,2005-05-05,0.0,188.381963,214.191126
AD,1995-09-13,0.0,103035.758621,97491.960182
AE,2006-10-02,0.0,2514.700265,2389.955631
AEX,2005-12-30,0.0,29927.687664,30722.434978
AJY,2002-05-31,0.0,164.429708,144.825939
ALJ,2013-12-20,0.0,24640.65508,27314.750577
ALM,2013-12-20,0.0,1440.331476,1614.313749
BB,2005-09-09,0.0,912.790698,1050.305987
BBN,2011-10-13,0.0,2304.184697,1842.367531
BD,1996-09-06,0.0,714788.340369,698020.773034
