# Description
- This notebook shows how to use the Kibot API

## Import

In [1]:
%load_ext autoreload
%autoreload 2
import logging

import pandas as pd
import seaborn as sns

import core.config as cfg
import core.explore as exp
import core.finance as fin
import core.signal_processing as sigp
import helpers.dbg as dbg
import helpers.env as env
import helpers.printing as pri
import vendors2.kibot.utils as kut

In [2]:
print(env.get_system_signature())

pri.config_notebook()

# dbg.init_logger(verbosity=logging.DEBUG)
dbg.init_logger(verbosity=logging.INFO)
# dbg.test_logger()

_LOG = logging.getLogger(__name__)

# Packages
         python: 3.7.3
         joblib: 0.14.0
          numpy: 1.17.3
         pandas: 0.25.2
        pyarrow: 0.15.0
          scipy: 1.3.1
        seaborn: 0.9.0
        sklearn: 0.21.3
    statsmodels: 0.10.1
# Last commits:
  * 2cc966f Julia    PartTask275: Add tests for ExpiryContractMapper                   (  19 hours ago) Wed Oct 30 15:19:59 2019  (HEAD -> PartTask275_PRICE_Organize_Kibot_metadata, origin/PartTask275_PRICE_Organize_Kibot_metadata)
  * b2c1be2 Julia    PartTask275: Use factored out functions, remove deps from the server (  21 hours ago) Wed Oct 30 13:43:39 2019           
  *   6029721 Julia    Merge remote-tracking branch 'origin/master' into PartTask275_PRICE_Organize_Kibot_metadata (  21 hours ago) Wed Oct 30 13:34:51 2019           
  |\  


# Metadata

## Read metadata

In [24]:
kmd = kut.KibotMetadata()
metadata = kmd.get_metadata()
metadata

Unnamed: 0,Description,StartDate,Exchange,num_contracts,min_contract,max_contract,num_expiries,expiries
AD,CONTINUOUS AUSTRALIAN DOLLAR CONTRACT,9/27/2009,Chicago Mercantile Exchange (CME GLOBEX),65.0,11.2009,11.202,12.0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"
AEX,CONTINUOUS AEX INDEX CONTRACT,,,116.0,3.201,2.202,12.0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"
ALJ,CONTINUOUS FTSE/JSE TOP 40 INDEX CONTRACT,,,25.0,5.2014,5.202,4.0,"[2, 5, 8, 11]"
ALM,CONTINUOUS MINI FTSE/JSE TOP 40 INDEX CONTRACT,,,22.0,5.2014,8.2019,4.0,"[2, 5, 8, 11]"
BB,CONTINUOUS MINI JAPANESE GOVERNMENT BOND CONTRACT,,,38.0,5.201,8.2019,4.0,"[2, 5, 8, 11]"
BD,CONTINUOUS EURO BUND CONTRACT,,,40.0,5.201,2.202,4.0,"[2, 5, 8, 11]"
BL,CONTINUOUS EURO BOBL CONTRACT,,,40.0,5.201,2.202,4.0,"[2, 5, 8, 11]"
BO,CONTINUOUS SOYBEAN OIL CONTRACT,9/27/2009,Chicago Board Of Trade (CBOT GLOBEX),96.0,9.2009,8.2021,8.0,"[0, 2, 4, 6, 7, 8, 9, 11]"
BP,CONTINUOUS BRITISH POUND CONTRACT,9/27/2009,Chicago Mercantile Exchange (CME GLOBEX),65.0,11.2009,8.202,12.0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"
BTC,CONTINUOUS BITCOIN FUTURES CONTRACT,12/17/2017,Chicago Mercantile Exchange (CME GLOBEX),22.0,0.2018,11.2019,12.0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"


In [26]:
metadata.sort_index()

Unnamed: 0,Description,StartDate,Exchange,num_contracts,min_contract,max_contract,num_expiries,expiries
AC,CONTINUOUS ETHANOL CONTRACT,9/28/2009,Chicago Board Of Trade (CBOT GLOBEX),122.0,9.2009,11.2019,12.0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"
AD,CONTINUOUS AUSTRALIAN DOLLAR CONTRACT,9/27/2009,Chicago Mercantile Exchange (CME GLOBEX),65.0,11.2009,11.202,12.0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"
AE,CONTINUOUS BLOOMBERG COMMODITY INDEX CONTRACT,,,38.0,5.201,8.2019,4.0,"[2, 5, 8, 11]"
AEX,CONTINUOUS AEX INDEX CONTRACT,,,116.0,3.201,2.202,12.0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"
AJY,CONTINUOUS AUSTRALIAN $/JAPANESE YEN CONTRACT,10/21/2009,Chicago Mercantile Exchange (CME GLOBEX),41.0,11.2009,11.2019,4.0,"[2, 5, 8, 11]"
ALJ,CONTINUOUS FTSE/JSE TOP 40 INDEX CONTRACT,,,25.0,5.2014,5.202,4.0,"[2, 5, 8, 11]"
ALM,CONTINUOUS MINI FTSE/JSE TOP 40 INDEX CONTRACT,,,22.0,5.2014,8.2019,4.0,"[2, 5, 8, 11]"
BB,CONTINUOUS MINI JAPANESE GOVERNMENT BOND CONTRACT,,,38.0,5.201,8.2019,4.0,"[2, 5, 8, 11]"
BBN,CONTINUOUS NZ 90 DAY BANK ACCEPTED BILL CONTRACT,,,38.0,11.2011,2.2021,4.0,"[2, 5, 8, 11]"
BD,CONTINUOUS EURO BUND CONTRACT,,,40.0,5.201,2.202,4.0,"[2, 5, 8, 11]"


## Read misc metadata

In [3]:
df1 = kut.read_1min_contract_metadata()
df1.head(3)

Unnamed: 0_level_0,Symbol,Link,Description
All_Futures_Contracts_1min.csv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,JY,http://api.kibot.com/?action=download&link=151...,CONTINUOUS JAPANESE YEN CONTRACT
2,JYF18,http://api.kibot.com/?action=download&link=vrv...,JAPANESE YEN JANUARY 2018
3,JYF19,http://api.kibot.com/?action=download&link=8r8...,JAPANESE YEN JANUARY 2019


In [4]:
df2 = kut.read_daily_contract_metadata()
df2.head(3)

Unnamed: 0_level_0,Symbol,Link,Description
All_Futures_Contracts_daily.csv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,JY,http://api.kibot.com/?action=download&link=151...,CONTINUOUS JAPANESE YEN CONTRACT
2,JYF18,http://api.kibot.com/?action=download&link=vrv...,JAPANESE YEN JANUARY 2018
3,JYF19,http://api.kibot.com/?action=download&link=8r8...,JAPANESE YEN JANUARY 2019


In [5]:
df3 = kut.read_tickbidask_contract_metadata()
df3.head(3)

Unnamed: 0,SymbolBase,Symbol,StartDate,Size(MB),Description,Exchange
1.0,ES,ES,9/30/2009,50610.0,CONTINUOUS E-MINI S&P 500 CONTRACT,Chicago Mercantile Exchange Mini Sized Contrac...
2.0,ES,ESH11,4/6/2010,891.0,E-MINI S&P 500 MARCH 2011,Chicago Mercantile Exchange Mini Sized Contrac...
3.0,ES,ESH12,3/6/2011,1060.0,E-MINI S&P 500 MARCH 2012,Chicago Mercantile Exchange Mini Sized Contrac...


In [6]:
df4 = kut.read_continuous_contract_metadata()
print(df4.head(3))

print(df4["Exchange"].unique())

    SymbolBase Symbol  StartDate  Size(MB)                                 Description                                  Exchange
1.0         JY     JY  9/27/2009     183.0            CONTINUOUS JAPANESE YEN CONTRACT  Chicago Mercantile Exchange (CME GLOBEX)
2.0         TY     TY  9/27/2009     180.0  CONTINUOUS 10 YR US TREASURY NOTE CONTRACT      Chicago Board Of Trade (CBOT GLOBEX)
3.0         FV     FV  9/27/2009     171.0   CONTINUOUS 5 YR US TREASURY NOTE CONTRACT      Chicago Board Of Trade (CBOT GLOBEX)
['Chicago Mercantile Exchange (CME GLOBEX)'
 'Chicago Board Of Trade (CBOT GLOBEX)'
 'Chicago Mercantile Exchange Mini Sized Contracts (CME MINI)'
 'Commodities Exchange Center (COMEX GLOBEX)'
 'New York Mercantile Exchange (NYMEX GLOBEX)'
 'Chicago Board Of Trade Mini Sized Contracts (CBOT MINI)'
 'New York Mercantile Exchange Mini Sized Contracts'
 'CBOE Futures Exchange (CFE)' nan]


In [22]:
df4.dropna(how="all")

Unnamed: 0,SymbolBase,Symbol,StartDate,Size(MB),Description,Exchange
1.0,JY,JY,9/27/2009,183.0,CONTINUOUS JAPANESE YEN CONTRACT,Chicago Mercantile Exchange (CME GLOBEX)
2.0,TY,TY,9/27/2009,180.0,CONTINUOUS 10 YR US TREASURY NOTE CONTRACT,Chicago Board Of Trade (CBOT GLOBEX)
3.0,FV,FV,9/27/2009,171.0,CONTINUOUS 5 YR US TREASURY NOTE CONTRACT,Chicago Board Of Trade (CBOT GLOBEX)
4.0,ES,ES,9/27/2009,162.0,CONTINUOUS E-MINI S&P 500 CONTRACT,Chicago Mercantile Exchange Mini Sized Contrac...
5.0,EU,EU,9/27/2009,160.0,CONTINUOUS EURO FX CONTRACT,Chicago Mercantile Exchange (CME GLOBEX)
6.0,GC,GC,9/27/2009,156.0,CONTINUOUS GOLD CONTRACT,Commodities Exchange Center (COMEX GLOBEX)
7.0,US,US,9/27/2009,154.0,CONTINUOUS 30 YR US TREASURY BOND CONTRACT,Chicago Board Of Trade (CBOT GLOBEX)
8.0,AD,AD,9/27/2009,152.0,CONTINUOUS AUSTRALIAN DOLLAR CONTRACT,Chicago Mercantile Exchange (CME GLOBEX)
9.0,NQ,NQ,9/27/2009,150.0,CONTINUOUS E-MINI NASDAQ 100 CONTRACT,Chicago Mercantile Exchange Mini Sized Contrac...
10.0,CL,CL,9/27/2009,146.0,CONTINUOUS CRUDE OIL CONTRACT,New York Mercantile Exchange (NYMEX GLOBEX)


## Explore metadata

In [18]:
mask = ["GAS" in d or "OIL" in d for d in df4["Description"].values]
print(sum(mask))
print(df4[mask].drop(["SymbolBase", "Size(MB)"], axis=1))

TypeError: argument of type 'float' is not iterable

# Price data

## Read continuous daily prices for single futures

In [None]:
s = "CL"
# nrows = None
nrows = 10000
df = kut.read_data("D", "continuous", s, nrows=nrows)
df.head(3)

## Read continuous 1-min prices for single futures

In [None]:
s = "CL"
# nrows = None
nrows = 10000
df = kut.read_data("T", "continuous", s, nrows=nrows)
df.head(3)

In [None]:
## Read continuous 1-min prices for multiple futures

## Read continuous daily prices for multiple futures

In [None]:
symbols = tuple("CL NG RB BZ".split())
nrows = 10000

daily_price_dict_df = kut.read_data("D", "continuous", symbols, nrows=nrows)

daily_price_dict_df["CL"].head(3)

## Read continuous 1-min prices for multiple futures

In [None]:
symbols = tuple("CL NG RB BZ".split())
nrows = 10000

daily_price_dict_df = kut.read_data("D", "continuous", symbols, nrows=nrows)

daily_price_dict_df["CL"].head(3)

## Read data through config API

In [None]:
config = cfg.Config.from_env()

if config is None:
    config = cfg.Config()
    config_tmp = config.add_subconfig("read_data")
    # Use the data from S3.
    file_name = hs3.get_path() + "/kibot/All_Futures_Contracts_1min/ES.csv.gz"
    config_tmp["file_name"] = file_name
    config_tmp["nrows"] = 100000

_LOG.info(config)

In [None]:
def read_data_from_config(config):
    _LOG.info("Reading data ...")
    config.check_params(["file_name"])
    return kut._read_data(config["file_name"], config.get("nrows", None))


df = read_data_from_config(config["read_data"])

_LOG.info("df.shape=%s", df.shape)
_LOG.info("datetimes=[%s, %s]", df.index[0], df.index[-1])
_LOG.info("df=\n%s", df.head(3))

## Read raw data directly from S3

In [None]:
s = "CL"
file_name = (
    hs3.get_path() + "/kibot/All_Futures_Continuous_Contracts_1min/%s.csv.gz" % s
)
nrows = 10000

df = pd.read_csv(file_name, header=None, parse_dates=[0], nrows=nrows)
# df.columns = "datetime open high low close vol".split()
df.head(3)

# Return computation

## 1-min for single futures

In [None]:
# TODO(gp)

## 1-min for multiple futures

In [None]:
# Read multiple futures.
symbols = tuple("CL NG RB BZ".split())
nrows = 100000
min_price_dict_df = kut.read_data(
    "T", "continuous", symbols, ext="csv", nrows=nrows
)
_LOG.info("keys=%s", min_price_dict_df.keys())
min_price_dict_df["CL"].tail(3)

### Compute returns ret_0

In [None]:
def compute_ret_0_from_multiple_1min_prices(price_dict_df, mode):
    dbg.dassert_isinstance(price_dict_df, dict)
    rets = []
    for s, price_df in price_dict_df.items():
        _LOG.debug("Processing s=%s", s)
        rets_tmp = fin.compute_ret_0(price_df["open"], mode)
        rets_tmp = pd.DataFrame(rets_tmp)
        rets_tmp.columns = ["%s_ret_0" % s]
        rets.append(rets_tmp)
    rets = pd.concat(rets, sort=True, axis=1)
    return rets


mode = "pct_change"
min_rets = compute_ret_0_from_multiple_1min_prices(min_price_dict_df, mode)


min_rets.head(3)

In [None]:
sigp.resample(min_rets.fillna(0.0), rule="1D").sum().cumsum().plot()

### Resample to 1min

In [None]:
# Resample to 1min.
_LOG.info("## Before resampling")
exp.report_zero_nan_inf_stats(min_rets)

In [None]:
exp.plot_non_na_cols(sigp.resample(min_rets, rule="1D").sum())

In [None]:
min_rets = fin.resample_1min(min_rets, skip_weekends=False)

_LOG.info("## After resampling")
exp.report_zero_nan_inf_stats(min_rets)

min_rets.fillna(0.0, inplace=True)

### z-scoring

In [None]:
zscore_com = 28
min_zrets = fin.zscore(
    min_rets, com=zscore_com, demean=False, standardize=True, delay=1
)
min_zrets.columns = [c.replace("ret_", "zret_") for c in min_zrets.columns]
min_zrets.dropna().head(3)

In [None]:
sigp.resample(min_zrets.fillna(0.0), rule="1D").sum().cumsum().plot()

In [None]:
annot = True
stocks_corr = min_rets.dropna().corr()

sns.clustermap(stocks_corr, annot=annot)

## Daily for single futures

## Daily for multiple futures