## Import

In [1]:
%load_ext autoreload
%autoreload 2
import datetime
import logging
import os
import platform

import numpy as np
import pandas as pd
import seaborn as sns
import scipy
import matplotlib
import matplotlib.pyplot as plt
import sklearn

import helpers.config as cfg
import helpers.dbg as dbg
import helpers.printing as printing
import core.explore as exp
import core.finance as fin

import vendors.kibot.utils as kut

In [2]:
print(cfg.get_system_signature())

printing.config_notebook()

# TODO(gp): Changing level during the notebook execution doesn't work. Fix it.
#dbg.init_logger(verb=logging.DEBUG)
dbg.init_logger(verb=logging.INFO)
#dbg.test_logger()

_LOG = logging.getLogger(__name__)

python=3.7.3
numpy=1.17.1
pandas=0.25.1
seaborn=0.9.0
scipy=1.3.1
matplotlib=3.1.1
sklearn=0.21.3
effective level= 20 (INFO)


# Metadata

In [3]:
df1 = kut.read_metadata1()
df1.head(3)

Unnamed: 0,Symbol,Link,Description
1,JY,http://api.kibot.com/?action=download&link=151...,CONTINUOUS JAPANESE YEN CONTRACT
2,JYF18,http://api.kibot.com/?action=download&link=vrv...,JAPANESE YEN JANUARY 2018
3,JYF19,http://api.kibot.com/?action=download&link=8r8...,JAPANESE YEN JANUARY 2019


In [4]:
df2 = kut.read_metadata2()
df2.head(3)

Unnamed: 0,Symbol,Link,Description
1,JY,http://api.kibot.com/?action=download&link=151...,CONTINUOUS JAPANESE YEN CONTRACT
2,JYF18,http://api.kibot.com/?action=download&link=vrv...,JAPANESE YEN JANUARY 2018
3,JYF19,http://api.kibot.com/?action=download&link=8r8...,JAPANESE YEN JANUARY 2019


In [5]:
df3 = kut.read_metadata3()
df3.head(3)

Unnamed: 0,SymbolBase,Symbol,StartDate,Size(MB),Description,Exchange
1,ES,ES,9/30/2009,50610.0,CONTINUOUS E-MINI S&P 500 CONTRACT,Chicago Mercantile Exchange Mini Sized Contrac...
2,ES,ESH11,4/6/2010,891.0,E-MINI S&P 500 MARCH 2011,Chicago Mercantile Exchange Mini Sized Contrac...
3,ES,ESH12,3/6/2011,1060.0,E-MINI S&P 500 MARCH 2012,Chicago Mercantile Exchange Mini Sized Contrac...


In [6]:
df4 = kut.read_metadata4()
print(df4.head(3))

print(df4["Exchange"].unique())

  SymbolBase Symbol  StartDate  Size(MB)                                 Description                                  Exchange
1         JY     JY  9/27/2009     183.0            CONTINUOUS JAPANESE YEN CONTRACT  Chicago Mercantile Exchange (CME GLOBEX)
2         TY     TY  9/27/2009     180.0  CONTINUOUS 10 YR US TREASURY NOTE CONTRACT      Chicago Board Of Trade (CBOT GLOBEX)
3         FV     FV  9/27/2009     171.0   CONTINUOUS 5 YR US TREASURY NOTE CONTRACT      Chicago Board Of Trade (CBOT GLOBEX)
['Chicago Mercantile Exchange (CME GLOBEX)'
 'Chicago Board Of Trade (CBOT GLOBEX)'
 'Chicago Mercantile Exchange Mini Sized Contracts (CME MINI)'
 'Commodities Exchange Center (COMEX GLOBEX)'
 'New York Mercantile Exchange (NYMEX GLOBEX)'
 'Chicago Board Of Trade Mini Sized Contracts (CBOT MINI)'
 'New York Mercantile Exchange Mini Sized Contracts'
 'CBOE Futures Exchange (CFE)']


## Explore metadata

In [7]:
mask = ["GAS" in d or "OIL" in d for d in df4["Description"]]
print(sum(mask))
print(df4[mask].drop(["SymbolBase", "Size(MB)"], axis=1))

7
   Symbol  StartDate                                   Description                                           Exchange
10     CL  9/27/2009                 CONTINUOUS CRUDE OIL CONTRACT        New York Mercantile Exchange (NYMEX GLOBEX)
20     NG  9/27/2009               CONTINUOUS NATURAL GAS CONTRACT        New York Mercantile Exchange (NYMEX GLOBEX)
24     QM  9/27/2009          CONTINUOUS E-MINI CRUDE OIL CONTRACT  New York Mercantile Exchange Mini Sized Contracts
29     RB  9/27/2009             CONTINUOUS RBOB GASOLINE CONTRACT        New York Mercantile Exchange (NYMEX GLOBEX)
35     BO  9/27/2009               CONTINUOUS SOYBEAN OIL CONTRACT               Chicago Board Of Trade (CBOT GLOBEX)
43     BZ  9/28/2009  CONTINUOUS BRENT CRUDE OIL LAST DAY CONTRACT        New York Mercantile Exchange (NYMEX GLOBEX)
49     QG  9/27/2009        CONTINUOUS E-MINI NATURAL GAS CONTRACT  New York Mercantile Exchange Mini Sized Contracts


In [8]:
df4[mask]['Symbol'].values

array(['CL', 'NG', 'QM', 'RB', 'BO', 'BZ', 'QG'], dtype=object)

# Read data

In [9]:
import collections

config = collections.OrderedDict()

if "__CONFIG__" in os.environ:
    config = os.environ["__CONFIG__"]
    print("__CONFIG__=", config)
    config = eval(config)
else:
    #config["nrows"] = 100000
    config["nrows"] = None
    #
    config["zscore_com"] = 28

print(cfg.config_to_string(config))

nrows: None
zscore_com: 28


# Prices

## Read daily prices

In [10]:
all_symbols = [
    futures.replace('.csv.gz', '') for futures in os.listdir(
        '/data/kibot/All_Futures_Continuous_Contracts_daily')
]

In [11]:
symbols = df4[mask]['Symbol'].values
symbols

array(['CL', 'NG', 'QM', 'RB', 'BO', 'BZ', 'QG'], dtype=object)

In [12]:
file_name = "/data/kibot/All_Futures_Continuous_Contracts_daily/%s.csv.gz"

daily_price_dict_df = kut.read_multiple_symbol_data(symbols, file_name, nrows=config["nrows"])

daily_price_dict_df["CL"].tail(2)

Reading file_name='/data/kibot/All_Futures_Continuous_Contracts_daily/CL.csv.gz' nrows=None
Reading file_name='/data/kibot/All_Futures_Continuous_Contracts_daily/NG.csv.gz' nrows=None
Reading file_name='/data/kibot/All_Futures_Continuous_Contracts_daily/QM.csv.gz' nrows=None
Reading file_name='/data/kibot/All_Futures_Continuous_Contracts_daily/RB.csv.gz' nrows=None
Reading file_name='/data/kibot/All_Futures_Continuous_Contracts_daily/BO.csv.gz' nrows=None
Reading file_name='/data/kibot/All_Futures_Continuous_Contracts_daily/BZ.csv.gz' nrows=None
Reading file_name='/data/kibot/All_Futures_Continuous_Contracts_daily/QG.csv.gz' nrows=None


Unnamed: 0_level_0,open,high,low,close,vol
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-07-18,56.72,57.45,54.85,55.42,764491
2019-07-19,55.84,56.49,55.12,55.76,710948


# Top futures by volume

## Sum volume

In [13]:
daily_volume_sum_dict = {
    symbol: daily_prices_symbol['vol'].sum()
    for symbol, daily_prices_symbol in daily_price_dict_df.items()
}

In [14]:
daily_volume_sum_df = pd.DataFrame.from_dict(daily_volume_sum_dict, orient='index', columns=['sum_vol'])
daily_volume_sum_df.index.name = 'symbol'

In [15]:
daily_volume_sum_df.sort_values('sum_vol', ascending=False)

Unnamed: 0_level_0,sum_vol
symbol,Unnamed: 1_level_1
CL,1406297147
NG,462417257
BO,171761967
RB,145838317
QM,56211899
BZ,38764685
QG,9643541


## Mean volume

In [16]:
daily_volume_mean_dict = {
    symbol: daily_prices_symbol['vol'].mean()
    for symbol, daily_prices_symbol in daily_price_dict_df.items()
}

In [17]:
daily_volume_mean_df = pd.DataFrame.from_dict(daily_volume_mean_dict, orient='index', columns=['mean_vol'])
daily_volume_mean_df.index.name = 'symbol'

In [18]:
daily_volume_mean_df.sort_values('mean_vol', ascending=False)

Unnamed: 0_level_0,mean_vol
symbol,Unnamed: 1_level_1
CL,246892.055302
NG,78415.67865
RB,42382.539087
BO,28872.409985
QM,15245.972064
BZ,12831.73949
QG,2616.971777
