## Description

Exploratory notebook for sanity-checking Kibot metadata

## Imports

In [1]:
%load_ext autoreload
%autoreload 2

import logging
import os

import pandas as pd
import pprint

import core.finance as fin
import core.statistics as stats
import helpers.dbg as dbg
import helpers.env as env
import helpers.printing as prnt
import vendors2.kibot.utils as kut

In [2]:
dbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", env.get_system_signature()[0])

prnt.config_notebook()

# Packages
         python: 3.7.6
         joblib: 0.14.1
          numpy: 1.18.1
         pandas: 1.0.3
        pyarrow: 0.16.0
          scipy: 1.4.1
        seaborn: 0.10.0
        sklearn: 0.22.2.post1
    statsmodels: 0.11.1
# Last commits:
  *   f48f746f Paul     Merge pull request #2059 from ParticleDev/PartTask1958_Refactor_WIND_config_builders0 (  16 hours ago) Mon Apr 20 02:02:24 2020  (HEAD -> PartTask2070_Clean_up_Kibot0, origin/master, origin/HEAD, master)
  |\  
  | * e480838d paul     Checkpoint                                                        (  16 hours ago) Mon Apr 20 01:51:19 2020  (origin/PartTask1958_Refactor_WIND_config_builders0, PartTask1958_Refactor_WIND_config_builders0)
  | * 2cc04848 paul     Checkpoint                                                        (  17 hours ago) Mon Apr 20 01:10:51 2020           


## Load Kibot

### Continuous contract metadata

In [3]:
# Load with the standalone function.
continuous_contract_metadata = kut.read_continuous_contract_metadata()

Found credentials in shared credentials file: ~/.aws/credentials


In [4]:
# Look at the data.
#
# There are problems:
#   - The last row is all NaNs
#   - The index is float instead of int (b/c the last row has a NaN value)
#   - The `StartDate` column is not a datetime column
#   - (harder) The Symbols (seems "SymbolBase" always equals "Symbol")
#     does not always match the CME GLOBEX symbol even when the "Exchange"
#     description includes "GLOBEX"
continuous_contract_metadata

Unnamed: 0,SymbolBase,Symbol,StartDate,Size(MB),Description,Exchange
1.0,JY,JY,9/27/2009,183.0,CONTINUOUS JAPANESE YEN CONTRACT,Chicago Mercantile Exchange (CME GLOBEX)
2.0,TY,TY,9/27/2009,180.0,CONTINUOUS 10 YR US TREASURY NOTE CONTRACT,Chicago Board Of Trade (CBOT GLOBEX)
3.0,FV,FV,9/27/2009,171.0,CONTINUOUS 5 YR US TREASURY NOTE CONTRACT,Chicago Board Of Trade (CBOT GLOBEX)
4.0,ES,ES,9/27/2009,162.0,CONTINUOUS E-MINI S&P 500 CONTRACT,Chicago Mercantile Exchange Mini Sized Contrac...
5.0,EU,EU,9/27/2009,160.0,CONTINUOUS EURO FX CONTRACT,Chicago Mercantile Exchange (CME GLOBEX)
6.0,GC,GC,9/27/2009,156.0,CONTINUOUS GOLD CONTRACT,Commodities Exchange Center (COMEX GLOBEX)
7.0,US,US,9/27/2009,154.0,CONTINUOUS 30 YR US TREASURY BOND CONTRACT,Chicago Board Of Trade (CBOT GLOBEX)
8.0,AD,AD,9/27/2009,152.0,CONTINUOUS AUSTRALIAN DOLLAR CONTRACT,Chicago Mercantile Exchange (CME GLOBEX)
9.0,NQ,NQ,9/27/2009,150.0,CONTINUOUS E-MINI NASDAQ 100 CONTRACT,Chicago Mercantile Exchange Mini Sized Contrac...
10.0,CL,CL,9/27/2009,146.0,CONTINUOUS CRUDE OIL CONTRACT,New York Mercantile Exchange (NYMEX GLOBEX)


In [5]:
# Same exercise but with the class
km = kut.KibotMetadata()

In [6]:
km_metadata = km.get_metadata()

In [7]:
# Potential data issues:
#   - km_metadata["Description"].str.contains("CONTINUOUS").value_counts()
#     shows 250 contracts with the name "CONTINUOUS" in the them, yet the
#     standalone function returns 87. Why?
#  
#   - "StartDate" is not a datetime column
#   - "min_contract" and "max_contract" should also be datetime columns
#   - "num_contracts" and "num_expiries" should be "int"
km_metadata

Unnamed: 0,Description,StartDate,Exchange,num_contracts,min_contract,max_contract,num_expiries,expiries
AC,CONTINUOUS ETHANOL CONTRACT,9/28/2009,Chicago Board Of Trade (CBOT GLOBEX),122.0,9.2009,11.2019,12.0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"
AD,CONTINUOUS AUSTRALIAN DOLLAR CONTRACT,9/27/2009,Chicago Mercantile Exchange (CME GLOBEX),65.0,11.2009,11.202,12.0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"
AE,CONTINUOUS BLOOMBERG COMMODITY INDEX CONTRACT,,,38.0,5.201,8.2019,4.0,"[2, 5, 8, 11]"
AEX,CONTINUOUS AEX INDEX CONTRACT,,,116.0,3.201,2.202,12.0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"
AJY,CONTINUOUS AUSTRALIAN $/JAPANESE YEN CONTRACT,10/21/2009,Chicago Mercantile Exchange (CME GLOBEX),41.0,11.2009,11.2019,4.0,"[2, 5, 8, 11]"
ALJ,CONTINUOUS FTSE/JSE TOP 40 INDEX CONTRACT,,,25.0,5.2014,5.202,4.0,"[2, 5, 8, 11]"
ALM,CONTINUOUS MINI FTSE/JSE TOP 40 INDEX CONTRACT,,,22.0,5.2014,8.2019,4.0,"[2, 5, 8, 11]"
BB,CONTINUOUS MINI JAPANESE GOVERNMENT BOND CONTRACT,,,38.0,5.201,8.2019,4.0,"[2, 5, 8, 11]"
BBN,CONTINUOUS NZ 90 DAY BANK ACCEPTED BILL CONTRACT,,,38.0,11.2011,2.2021,4.0,"[2, 5, 8, 11]"
BD,CONTINUOUS EURO BUND CONTRACT,,,40.0,5.201,2.202,4.0,"[2, 5, 8, 11]"
