In [1]:
import warnings
warnings.filterwarnings('ignore')

___
# Merge csv files for ETF shares, CME Open Interest, and CFTC COT reports.
___

### Usage:
Run all the cells in this workbook after you have run all of the "build" notebooks in this project

### Main Graphs:
The graphs that follow the header ```COT vs Market Graphs``` show the graph of the lastest COT (new format) for the main physical commodities vs a graph of the settlement prices for those commodities, or for an ETF that closely tracts those commodities (think Comex GC vs the ETF GLD).


In [2]:
import pandas as pd
from pandas_datareader import data as pdr
import numpy as np
import pathlib
import sys
import os
abs_folders = [os.path.abspath(d) for d in ['./','../']]
for af in abs_folders:
    if  not af in sys.path:
        sys.path.append(af)
from cme_open_interest import db_info

import datetime
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly.offline import iplot
import plotly.io as pio
from plotly.subplots import make_subplots
from plotly.offline import  init_notebook_mode, iplot
init_notebook_mode(connected=True)

import zipfile
import urllib.request
from PIL import Image
import jupyter_utilities as ju
import importlib
import pandasql as psql
import traceback
import pdb

# Make important folders
TEMP_FOLDER = './temp_folder'
try:
    os.mkdir(TEMP_FOLDER)
except:
    pass
SAVE_IMAGE_FOLDER = f'{TEMP_FOLDER}/gold'
try:
    os.mkdir(SAVE_IMAGE_FOLDER)
except:
    pass

def to_int(s,print_full_exception=False):
    try:
        return int(float(str(s)))
    except Exception as e:
        print(f'to_int exception on value:{s}')
        if print_full_exception:
            traceback.print_exc()
        return None

pd.set_option('display.max_colwidth',1000)
if os.path.abspath('../')  not in sys.path:
    if '.' not in sys.path:
        sys.path.append(os.path.abspath('../'))
import barchart_api as bcapi
import importlib
import json
from dashapp import dashapp2 as dashapp

cme_csv_save_folder = './cme_oi_data'
cot_data_path = './cot_net_new_history.csv'
etf_data_path = './etf_cap_hist.csv'

In [3]:
log = dashapp.logging.getLogger(__name__)
log.setLevel(dashapp.logging.INFO)

In [4]:
opttab = 'sec_schema.options_table'
futtab = 'sec_schema.underlying_table'

pga = db_info.get_db_info()
print(f"futtab max date: {pga.get_sql(f'select max(settle_date) from {futtab}')}")
print(f"opttab max date: {pga.get_sql(f'select max(settle_date) from {opttab}')}")


  sec_db
futtab max date:         max
0  20200612
opttab max date:         max
0  20200612


### Define commodity and etf identifiers in the csv files

In [5]:
OI_ID_GOLD = 'GOLD FUTURES'
OI_ID_SILVER = 'SILVER FUTURES'
OI_ID_CL = 'CRUDE OIL LIGHT SWEET FUTURES'
OI_ID_NG = 'NATURAL GAS FUTURES'
OI_ID_10Y = '10Y NOTE FUTURE'
OI_ID_SPY = 'E-MINI S&P 500 FUTURE'
OI_ID_SOYB = 'SOYBEAN FUTURE'
OI_ID_SOYO = 'SOYBEAN OIL FUTURE'
OI_ID_CORN = 'CORN FUTURE'
OI_ID_WHEAT = 'CHICAGO SRW WHEAT FUTURE'
OI_ID_COTTON = 'COTTON FUTURES'
OI_ID_EURO = 'EURO FX FUTURE'
OI_ID_ED = 'EURODOLLAR FUTURE'
OI_ID_HG = 'HIGH GRADE COPPER FUTURES'
OI_ID_PL = 'PLATINUM FUTURES'
OI_ID_SUGAR = 'SUGAR 11 FUTURES'
OI_ID_COCOA = 'COCOA FUTURES'

COT_ID_GOLD= 'GOLD - COMMODITY EXCHANGE INC.'
COT_ID_SILVER= 'SILVER - COMMODITY EXCHANGE INC.'
# COT_ID_CL = 'CRUDE OIL, LIGHT SWEET'
COT_ID_CL = 'CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE'
COT_ID_NG = 'NATURAL GAS - NEW YORK MERCANTILE EXCHANGE'
COT_ID_10Y = '10-YEAR U.S. TREASURY NOTES - CHICAGO BOARD OF TRADE'
COT_ID_SPY = 'E-MINI S&P 500 STOCK INDEX - CHICAGO MERCANTILE EXCHANGE'
COT_ID_SOYB = 'SOYBEANS - CHICAGO BOARD OF TRADE'
COT_ID_SOYO = 'SOYBEAN OIL - CHICAGO BOARD OF TRADE'
# COT_ID_WHEAT = 'WHEAT - CHICAGO BOARD OF TRADE'
COT_ID_COTTON = 'COTTON NO. 2 - ICE FUTURES U.S.'
COT_ID_WHEAT = 'WHEAT-SRW - CHICAGO BOARD OF TRADE'
COT_ID_CORN = 'CORN - CHICAGO BOARD OF TRADE'
COT_ID_EURO = 'EURO FX - CHICAGO MERCANTILE EXCHANGE'
COT_ID_ED = '3-MONTH EURODOLLARS - CHICAGO MERCANTILE EXCHANGE'
COT_ID_HG = 'COPPER-GRADE #1 - COMMODITY EXCHANGE INC.'
COT_ID_PL = 'PLATINUM - NEW YORK MERCANTILE EXCHANGE'
COT_ID_SUGAR = 'SUGAR NO. 11 - ICE FUTURES U.S.'
COT_ID_COCOA = 'COCOA - ICE FUTURES U.S.'

ETF_ID_GOLD = 'GCZ99'
ETF_ID_SILVER = 'SIZ99'
ETF_ID_CL = 'CLZ99'
ETF_ID_NG = 'NGZ99'
ETF_ID_10Y = 'AGG'
ETF_ID_SPY = 'SPY'
ETF_ID_COTTON = 'BAL'
ETF_ID_SOYB = 'ZSZ99'#'ZSY00'
ETF_ID_SOYO = 'ZLZ99'#'ZLY00'
ETF_ID_WHEAT = 'ZWZ99'#'ZWY00'#'WEAT'
ETF_ID_CORN = 'ZCZ99'#'CORN'
ETF_ID_EURO = 'FXE'
ETF_ID_ED = 'BSV'
ETF_ID_HG = 'CPER'
ETF_ID_PL = 'PPLT'
ETF_ID_SUGAR = 'SGG'
ETF_ID_COCOA = 'NIB'

ETF_SHARES_DIVISOR_GOLD = 1000
ETF_SHARES_DIVISOR_SILVER = 5000
ETF_SHARES_DIVISOR_10Y = 1000
ETF_SHARES_DIVISOR_SPY = 500
ETF_SHARES_DIVISOR_COTTON = 50000
ETF_SHARES_DIVISOR_SOYB = 5000
ETF_SHARES_DIVISOR_SOYO = 1
ETF_SHARES_DIVISOR_WHEAT = 5000
ETF_SHARES_DIVISOR_CORN = 5000
ETF_SHARES_DIVISOR_EURO = 1000
ETF_SHARES_DIVISOR_CL = 4000
ETF_SHARES_DIVISOR_NG = 1250
ETF_SHARES_DIVISOR_ED = 1000000
ETF_SHARES_DIVISOR_HG = 2500
ETF_SHARES_DIVISOR_PL = 500
ETF_SHARES_DIVISOR_SUGAR = int((112000/100)/3)
ETF_SHARES_DIVISOR_COCOA = 100


ID_DICT = {
    'gold':{'OI':OI_ID_GOLD,'COT':COT_ID_GOLD,'ETF':ETF_ID_GOLD,'ETF_DIVISOR':ETF_SHARES_DIVISOR_GOLD},
    'silver':{'OI':OI_ID_SILVER,'COT':COT_ID_SILVER,'ETF':ETF_ID_SILVER,'ETF_DIVISOR':ETF_SHARES_DIVISOR_SILVER},
#     '10Y':{'OI':OI_ID_10Y,'COT':COT_ID_10Y,'ETF':ETF_ID_10Y,'ETF_DIVISOR':ETF_SHARES_DIVISOR_10Y},
#     'spy':{'OI':OI_ID_SPY,'COT':COT_ID_SPY,'ETF':ETF_ID_SPY,'ETF_DIVISOR':ETF_SHARES_DIVISOR_SPY},
    'cotton':{'OI':OI_ID_COTTON,'COT':COT_ID_COTTON,'ETF':ETF_ID_COTTON,'ETF_DIVISOR':ETF_SHARES_DIVISOR_COTTON},
    'soyb':{'OI':OI_ID_SOYB,'COT':COT_ID_SOYB,'ETF':ETF_ID_SOYB,'ETF_DIVISOR':ETF_SHARES_DIVISOR_SOYB},
    'soyo':{'OI':OI_ID_SOYO,'COT':COT_ID_SOYO,'ETF':ETF_ID_SOYO,'ETF_DIVISOR':ETF_SHARES_DIVISOR_SOYO},
    'wheat':{'OI':OI_ID_WHEAT,'COT':COT_ID_WHEAT,'ETF':ETF_ID_WHEAT,'ETF_DIVISOR':ETF_SHARES_DIVISOR_WHEAT},
    'corn':{'OI':OI_ID_CORN,'COT':COT_ID_CORN,'ETF':ETF_ID_CORN,'ETF_DIVISOR':ETF_SHARES_DIVISOR_CORN},
#     'euro':{'OI':OI_ID_EURO,'COT':COT_ID_EURO,'ETF':ETF_ID_EURO,'ETF_DIVISOR':ETF_SHARES_DIVISOR_EURO},
    'cl':{'OI':OI_ID_CL,'COT':COT_ID_CL,'ETF':ETF_ID_CL,'ETF_DIVISOR':ETF_SHARES_DIVISOR_CL},
    'ng':{'OI':OI_ID_NG,'COT':COT_ID_NG,'ETF':ETF_ID_NG,'ETF_DIVISOR':ETF_SHARES_DIVISOR_NG},
#     'ed':{'OI':OI_ID_ED,'COT':COT_ID_ED,'ETF':ETF_ID_ED,'ETF_DIVISOR':ETF_SHARES_DIVISOR_ED},
    'hg':{'OI':OI_ID_HG,'COT':COT_ID_HG,'ETF':ETF_ID_HG,'ETF_DIVISOR':ETF_SHARES_DIVISOR_HG},
    'pl':{'OI':OI_ID_PL,'COT':COT_ID_PL,'ETF':ETF_ID_PL,'ETF_DIVISOR':ETF_SHARES_DIVISOR_PL},
    'sugar':{'OI':OI_ID_SUGAR,'COT':COT_ID_SUGAR,'ETF':ETF_ID_SUGAR,'ETF_DIVISOR':ETF_SHARES_DIVISOR_SUGAR},
    'cocoa':{'OI':OI_ID_COCOA,'COT':COT_ID_COCOA,'ETF':ETF_ID_COCOA,'ETF_DIVISOR':ETF_SHARES_DIVISOR_COCOA},
}


YEAR_OFFSET = 0 if datetime.datetime.now() > datetime.datetime(2020,1,10) else 1

In [6]:
ID_DICT['wheat']['ETF']

'ZWZ99'

___
## Define help access routines
___

In [7]:
# COT helpers
def df_cot_by_name(dict_id='cl',df_cot=None):
    dfc = df_cot2 if df_cot is None else df_cot
    cot_id = ID_DICT[dict_id]['COT']
    return dfc[dfc.Market_and_Exchange_Names==cot_id]


___
### Get cme open interest, COT and ETF data from csv files
___

In [8]:
import traceback
df_oi = None
last_year = int(datetime.datetime.now().year)
years = np.linspace(2013,last_year-YEAR_OFFSET,last_year-2013+1,dtype=int)
df_oi = None
for y in years:
    df_temp = pd.read_csv(f'{cme_csv_save_folder}/cme_open_interest_{y}.csv')
    df_temp = df_temp[~df_temp.Open_Interest.isnull()]
    if df_oi is None:
        df_oi = df_temp.copy()
    else:
        df_oi = df_oi.append(df_temp,ignore_index=True)
        df_oi.index = list(range(len(df_oi)))
df_oi = df_oi[~df_oi.Total_Volume.isnull()]
df_oi.ExPit_Volume = df_oi.ExPit_Volume.fillna(0)
df_oi = df_oi[~df_oi.Open_Interest.astype(str).str.contains('T')]
df_oi.Open_Interest = df_oi.Open_Interest.apply(to_int)
df_oi.Total_Volume = df_oi.Total_Volume.apply(to_int)
print(f'oi length:{len(df_oi)}')
df_etf = pd.read_csv(etf_data_path)
df_etf['trade_date'] = df_etf.date.apply(ju.str_to_yyyymmdd)
print(f'etf length:{len(df_etf)}')
df_cot2 = pd.read_csv(cot_data_path)
df_cot2.As_of_Date_In_Form_YYMMDD = df_cot2.As_of_Date_In_Form_YYMMDD.apply(ju.str_to_date)
df_cot2.Market_and_Exchange_Names = df_cot2.Market_and_Exchange_Names.str.strip() 
print(f'cot length:{len(df_cot2)}')


oi length:1616419
etf length:23183
cot length:92377


### For some of the ETF's, get the data from yahoo, and ignore the shares data

In [9]:
def fetch_history(symbol,dt_beg,dt_end):
    df = pdr.DataReader(symbol, 'yahoo', dt_beg, dt_end)
    df['date'] = df.index
    df.date = df.date.apply(lambda d: str(d)[0:4] + "-" + str(d)[5:7] + "-" + str(d)[8:10])
    df['trade_date'] = df.date.apply(lambda d: int(str(d)[0:4] + str(d)[5:7] + str(d)[8:10]))
    df = df.sort_values('date')
    df.index = list(range(len(df)))
    # make adj close the close
    df['nav'] = df['Adj Close']
    df['symbol'] = symbol
    df['shares'] = 0
    df = df[['symbol','date','nav','shares','trade_date']]
    return df

    

In [10]:
df_soyb = fetch_history(ETF_ID_SUGAR,datetime.datetime(2015,1,1),datetime.datetime(2020,12,31))
# print(df_etf.tail())
print(df_soyb.tail())
print(df_soyb.head())
# df_etf = df_etf.append(df_soyb,ignore_index=True)

2020-06-15 17:13:53,448 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): finance.yahoo.com:443
2020-06-15 17:13:53,903 - urllib3.connectionpool - DEBUG - https://finance.yahoo.com:443 "GET /quote/SGG/history?period1=1420102800&period2=1609491599&interval=1d&frequency=1d&filter=history HTTP/1.1" 200 None


    symbol        date        nav  shares  trade_date
596    SGG  2020-06-09  38.754002       0    20200609
597    SGG  2020-06-10  39.462002       0    20200610
598    SGG  2020-06-11  38.016998       0    20200611
599    SGG  2020-06-12  38.230000       0    20200612
600    SGG  2020-06-15  38.414101       0    20200615
  symbol        date        nav  shares  trade_date
0    SGG  2018-01-25  49.790001       0    20180125
1    SGG  2018-01-26  49.790001       0    20180126
2    SGG  2018-01-29  49.790001       0    20180129
3    SGG  2018-01-30  49.790001       0    20180130
4    SGG  2018-01-31  49.630001       0    20180131


In [11]:
def get_nasdaq_commod(commod='ZL'):
    df_zl = pd.read_csv(f'https://www.nasdaq.com/api/v1/historical/{commod}/commodities/2015-11-15/2022-12-15')
#     df_soyo = pd.DataFrame({'date':df_zl.as_matrix()[:,0],'nav':df_zl.as_matrix()[:,1],'shares':0})
    df_soyo = pd.DataFrame({'date':df_zl.values[:,0],'nav':df_zl.values[:,1],'shares':0})
    df_soyo['symbol'] = 'SOYO'
    df_soyo['date'] = df_soyo.date.apply(lambda s: s[6:10] + '-' + s[0:2] + '-' + s[3:5])
    df_soyo  = df_soyo.sort_values('date')
    df_soyo['trade_date'] = df_soyo.date.apply(lambda s: int(s.replace('-','')))
    return df_soyo[['symbol','date','nav','shares','trade_date']]

In [12]:
ETF_ID_SOYO,ETF_ID_SOYO[-3:]

('ZLZ99', 'Z99')

In [13]:
def get_barchart_commod(commod='ZL',month='Z',year=99):
    sql = f"""
    with 
    f1 as (
        select * from {futtab} where symbol = '{commod}{month}{year}'
    )
    select * from f1
    """
    df = pga.get_sql(sql)
    df['date'] = df.settle_date.apply(lambda v: f"{str(v)[0:4]}-{str(v)[4:6]}-{str(v)[6:8]}")
    df['nav'] = df.close
    df['trade_date'] = df.settle_date
    df['shares'] = 0

    df = df[['symbol','date','nav','shares','trade_date']]
    return df    

In [14]:
df_wheat = get_barchart_commod('CL')
df_wheat.head()

Unnamed: 0,symbol,date,nav,shares,trade_date
0,CLZ99,2010-09-01,73.91,0,20100901
1,CLZ99,2010-09-02,75.02,0,20100902
2,CLZ99,2010-09-03,74.6,0,20100903
3,CLZ99,2010-09-07,74.09,0,20100907
4,CLZ99,2010-09-09,74.25,0,20100909


In [15]:
# get_nasdaq_commod(ETF_ID_SOYO[:2])

### Find identifier strings for specific Open Interest and COT rows in their respective DataFrames
1. Enter values for oi_key_word, cot_key_word and etf_key_word below
2. Choose the product/market_and_exchange_name/symbol that has the highest open_interest or volume

In [16]:
import pdb
def create_merged_df(commod_to_use):
    global df_oi,df_etf,df_cot2
    # step 1 set up ID's
    OI_ID = ID_DICT[commod_to_use]['OI']
    COT_ID = ID_DICT[commod_to_use]['COT']
    ETF_ID = ID_DICT[commod_to_use]['ETF']
    ETF_DIVISOR = ID_DICT[commod_to_use]['ETF_DIVISOR']
    #Step 2: make sure ID's produce only one contract
    oi_key_word = OI_ID.lower()
    cot_key_word = COT_ID.lower()
    etf_key_word = ETF_ID.lower()

    l = (list(filter(lambda s: oi_key_word == str(s).lower(),df_oi.Product_Description.unique())))
    df_oi_sub = df_oi[df_oi.Product_Description.isin(l)][['Product_Description','Open_Interest']]
    df_oi_gb = df_oi_sub.groupby('Product_Description',as_index=False).sum()

    l = (list(filter(lambda s: cot_key_word == str(s).lower(),df_cot2.Market_and_Exchange_Names.unique())))
    df_cot_single = df_cot2[df_cot2.Market_and_Exchange_Names==COT_ID]
    df_cot_sub = df_cot2[df_cot2.Market_and_Exchange_Names.isin(l)][['Market_and_Exchange_Names','Open_Interest_All']]
    df_cot_gb = df_cot_sub.groupby('Market_and_Exchange_Names',as_index=False).sum()

    l = (list(filter(lambda s: etf_key_word == str(s).lower(),df_etf.symbol.unique())))
    dtmin = str(df_cot_single.As_of_Date_In_Form_YYMMDD.min())[0:10]
    dtmax = str(datetime.datetime.now())[0:10]
    #pdb.set_trace()
    if ETF_ID[-3:] == 'Y00':
        print(f'using get_nasdaq_commod for {ETF_ID}')
        df_etf_single = get_nasdaq_commod(ETF_ID[:2])
    elif ETF_ID[-3:] == 'Z99':
        print(f'using get_barchart_commod for {ETF_ID}')
        df_etf_single = get_barchart_commod(ETF_ID[:2])
    else:
        df_etf_single = fetch_history(ETF_ID,dtmin,dtmax)
    df_etf_gb = df_etf_single.groupby('symbol',as_index=False).sum()
    

    if len(df_oi_gb) + len(df_cot_gb) + len(df_etf_gb) == 3:
        print(f"all ID's for commodity: {commod_to_use} are OK")
    else:
        print(f"!!!!! ALL ID's FOR COMMODITY: {commod_to_use} ARE NOT OK!!!!!!")
        print(f'oi is OK: {len(df_oi_gb)==1}')
        print(f'cot is OK: {len(df_cot_gb)==1}')
        print(f'etf is OK: {len(df_etf_gb)==1}')
        raise ValueError(f'ambiguous ID name {OI_ID}')
    
    
    # Step 3: merge oi, cot and etf stuff
    df_oi_single = df_oi[df_oi.Product_Description == OI_ID][['trade_date','Open_Interest','Total_Volume']]
    df_oi_single.Open_Interest = df_oi_single.Open_Interest.apply(to_int)
    df_oi_single.Total_Volume = df_oi_single.Total_Volume.astype(float).astype(int)
    df_commod_net = df_cot_single.copy()

    df_commod_net['cot_yyyymmdd'] = df_commod_net.As_of_Date_In_Form_YYMMDD.apply(ju.str_to_yyyymmdd)
    df_commod_net = df_commod_net.sort_values('cot_yyyymmdd')
    df_commod_net.index = list(range(len(df_commod_net)))

    last_date = ju.str_to_date(str(df_commod_net.iloc[-1].cot_yyyymmdd),sep='') + datetime.timedelta(7)
    last_date_yyyymmdd = ju.str_to_yyyymmdd(last_date)
    df_commod_net['next_cot_yyyymmdd'] = list(df_commod_net[1:].cot_yyyymmdd) + [last_date_yyyymmdd]

#     df_etf_oi = df_etf_single[['trade_date','nav','shares']].merge(df_oi_single,how='inner',on='trade_date')
#     df_etf_oi['nav_diff'] = df_etf_oi.nav.pct_change()
#     df_etf_oi['share_diff'] = df_etf_oi.shares.pct_change()
#     df_etf_oi['oi_diff'] = df_etf_oi.Open_Interest.pct_change()
    
    df_etf_oi = df_etf_single.copy()
    q = f"select * from df_etf_oi inner join df_commod_net on df_etf_oi.trade_date >= df_commod_net.cot_yyyymmdd and df_etf_oi.trade_date < df_commod_net.next_cot_yyyymmdd"
    df_etf_oi_cot =  psql.sqldf(q, locals())

    # step 4: create final,
    cot_cols = ['prod_net','monman_net','swap_net','other_net','nonrep_net','prod_ratio','monman_ratio','swap_ratio','other_ratio']
    df_final = df_etf_oi_cot[['trade_date','nav']+cot_cols]#[-1000:]
    df_final = df_final.loc[:,~df_final.columns.duplicated()]
    df_final = df_final.sort_values('trade_date')
    return df_final


In [17]:
# import pdb
# def create_merged_df2(commod_to_use):
#     global df_oi,df_etf,df_cot2
#     # step 1 set up ID's
#     COT_ID = ID_DICT[commod_to_use]['COT']
#     ETF_ID = ID_DICT[commod_to_use]['ETF']
#     ETF_DIVISOR = ID_DICT[commod_to_use]['ETF_DIVISOR']
#     #Step 2: make sure ID's produce only one contract
#     cot_key_word = COT_ID.lower()
#     etf_key_word = ETF_ID.lower()

#     l = (list(filter(lambda s: cot_key_word == str(s).lower(),df_cot2.Market_and_Exchange_Names.unique())))
#     df_cot_single = df_cot2[df_cot2.Market_and_Exchange_Names==COT_ID]
#     df_cot_sub = df_cot2[df_cot2.Market_and_Exchange_Names.isin(l)][['Market_and_Exchange_Names','Open_Interest_All']]
#     df_cot_gb = df_cot_sub.groupby('Market_and_Exchange_Names',as_index=False).sum()

#     l = (list(filter(lambda s: etf_key_word == str(s).lower(),df_etf.symbol.unique())))
#     dtmin = str(df_cot_single.As_of_Date_In_Form_YYMMDD.min())[0:10]
#     dtmax = str(datetime.datetime.now())[0:10]

#     #pdb.set_trace()
#     if ETF_ID[-3:] == 'Y00':
#         print(f'using get_nasdaq_commod for {ETF_ID}')
#         df_etf_single = get_nasdaq_commod(ETF_ID[:2])
#     elif ETF_ID[-3:] == 'Z99':
#         print(f'using get_barchart_commod for {ETF_ID}')
#         df_etf_single = get_barchart_commod(ETF_ID[:2])
#     else:
#         df_etf_single = fetch_history(ETF_ID,dtmin,dtmax)
#     df_etf_gb = df_etf_single.groupby('symbol',as_index=False).sum()
    

# #     if len(df_oi_gb) + len(df_cot_gb) + len(df_etf_gb) == 3:
#     if len(df_cot_gb) + len(df_etf_gb) == 2:
#         print(f"all ID's for commodity: {commod_to_use} are OK")
#     else:
#         print(f"!!!!! ALL ID's FOR COMMODITY: {commod_to_use} ARE NOT OK!!!!!!")
#         print(f'oi is OK: {len(df_oi_gb)==1}')
#         print(f'cot is OK: {len(df_cot_gb)==1}')
#         print(f'etf is OK: {len(df_etf_gb)==1}')
#         raise ValueError(f'ambiguous ID name {OI_ID}')
    
    
#     df_commod_net = df_cot_single.copy()

#     df_commod_net['cot_yyyymmdd'] = df_commod_net.As_of_Date_In_Form_YYMMDD.apply(ju.str_to_yyyymmdd)
#     df_commod_net = df_commod_net.sort_values('cot_yyyymmdd')
#     df_commod_net.index = list(range(len(df_commod_net)))

#     last_date = ju.str_to_date(str(df_commod_net.iloc[-1].cot_yyyymmdd),sep='') + datetime.timedelta(7)
#     last_date_yyyymmdd = ju.str_to_yyyymmdd(last_date)
#     df_commod_net['next_cot_yyyymmdd'] = list(df_commod_net[1:].cot_yyyymmdd) + [last_date_yyyymmdd]

#     df_etf_oi = df_etf_single.copy()
#     q = f"select * from df_etf_oi inner join df_commod_net on df_etf_oi.trade_date >= df_commod_net.cot_yyyymmdd and df_etf_oi.trade_date < df_commod_net.next_cot_yyyymmdd"
#     df_etf_oi_cot =  psql.sqldf(q, locals())

#     # step 4: create final,
#     cot_cols = ['prod_net','monman_net','swap_net','other_net','nonrep_net','prod_ratio','monman_ratio','swap_ratio','other_ratio']
#     df_final = df_etf_oi_cot[['trade_date','nav']+cot_cols]#[-1000:]
#     df_final = df_final.loc[:,~df_final.columns.duplicated()]
#     df_final = df_final.sort_values('trade_date')
#     return df_final


In [18]:
# COT_ID = ID_DICT['cl']['COT']
# print(len(df_cot2[df_cot2.Market_and_Exchange_Names==COT_ID]))
# cols = ['As_of_Date_In_Form_YYMMDD','monman_net']
# [
#     df_cot2.As_of_Date_In_Form_YYMMDD.min(),
#     df_oi.trade_date.min()
# ]

In [19]:
create_merged_df('cl')


using get_barchart_commod for CLZ99
all ID's for commodity: cl are OK


Unnamed: 0,trade_date,nav,prod_net,monman_net,swap_net,other_net,nonrep_net,prod_ratio,monman_ratio,swap_ratio,other_ratio
0,20100901,73.91,-199158.0,71495.0,172980.0,-58375.0,13058.0,0.405168,1.608795,2.196472,0.593724
1,20100902,75.02,-199158.0,71495.0,172980.0,-58375.0,13058.0,0.405168,1.608795,2.196472,0.593724
2,20100903,74.60,-199158.0,71495.0,172980.0,-58375.0,13058.0,0.405168,1.608795,2.196472,0.593724
3,20100907,74.09,-186602.0,77545.0,161163.0,-67056.0,14950.0,0.502213,1.707314,2.038990,0.554270
4,20100909,74.25,-186602.0,77545.0,161163.0,-67056.0,14950.0,0.502213,1.707314,2.038990,0.554270
...,...,...,...,...,...,...,...,...,...,...,...
2451,20200608,38.24,-170091.0,394131.0,-429923.0,174199.0,31684.0,0.701738,9.928505,0.257031,2.492721
2452,20200609,38.92,-177091.0,397549.0,-431472.0,170360.0,40654.0,0.691594,9.434800,0.262244,2.445039
2453,20200610,39.54,-177091.0,397549.0,-431472.0,170360.0,40654.0,0.691594,9.434800,0.262244,2.445039
2454,20200611,36.41,-177091.0,397549.0,-431472.0,170360.0,40654.0,0.691594,9.434800,0.262244,2.445039


### Create multi plot of all data commodities

In [49]:
import traceback
dict_df = {}
last_n_days = 365*10
for k in ID_DICT.keys():
    print(f'processing {k}')
    try:
        df = create_merged_df(k)
        dict_df[k] = df.iloc[-last_n_days:]
    except Exception as e:
        print(f'EXCEPTION: {str(e)}')
#         print(traceback.print_exc())

processing gold
using get_barchart_commod for GCZ99
all ID's for commodity: gold are OK
processing silver
using get_barchart_commod for SIZ99
all ID's for commodity: silver are OK


2020-06-15 17:30:44,460 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): finance.yahoo.com:443


processing cotton


2020-06-15 17:30:44,901 - urllib3.connectionpool - DEBUG - https://finance.yahoo.com:443 "GET /quote/BAL/history?period1=1262682000&period2=1592294399&interval=1d&frequency=1d&filter=history HTTP/1.1" 200 None


all ID's for commodity: cotton are OK
processing soyb
using get_barchart_commod for ZSZ99
all ID's for commodity: soyb are OK
processing soyo
using get_barchart_commod for ZLZ99
all ID's for commodity: soyo are OK
processing wheat
using get_barchart_commod for ZWZ99
all ID's for commodity: wheat are OK
processing corn
using get_barchart_commod for ZCZ99
all ID's for commodity: corn are OK
processing cl
using get_barchart_commod for CLZ99
all ID's for commodity: cl are OK
processing ng
using get_barchart_commod for NGZ99
all ID's for commodity: ng are OK


2020-06-15 17:30:49,827 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): finance.yahoo.com:443


processing hg


2020-06-15 17:30:50,327 - urllib3.connectionpool - DEBUG - https://finance.yahoo.com:443 "GET /quote/CPER/history?period1=1262682000&period2=1592294399&interval=1d&frequency=1d&filter=history HTTP/1.1" 200 None


all ID's for commodity: hg are OK
processing pl


2020-06-15 17:30:50,900 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): finance.yahoo.com:443
2020-06-15 17:30:51,453 - urllib3.connectionpool - DEBUG - https://finance.yahoo.com:443 "GET /quote/PPLT/history?period1=1262682000&period2=1592294399&interval=1d&frequency=1d&filter=history HTTP/1.1" 200 None


all ID's for commodity: pl are OK


2020-06-15 17:30:52,190 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): finance.yahoo.com:443


processing sugar


2020-06-15 17:30:52,576 - urllib3.connectionpool - DEBUG - https://finance.yahoo.com:443 "GET /quote/SGG/history?period1=1262682000&period2=1592294399&interval=1d&frequency=1d&filter=history HTTP/1.1" 200 None


all ID's for commodity: sugar are OK
processing cocoa


2020-06-15 17:30:53,061 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): finance.yahoo.com:443
2020-06-15 17:30:53,398 - urllib3.connectionpool - DEBUG - https://finance.yahoo.com:443 "GET /quote/NIB/history?period1=1262682000&period2=1592294399&interval=1d&frequency=1d&filter=history HTTP/1.1" 200 None


all ID's for commodity: cocoa are OK


___
### Plot each df in dict_df using plotly
___

In [50]:
ddd = dict_df['cl']
net_cols = [c for c in ddd.columns.values if '_net' in c]
f = ju.plotly_plot(ddd[['trade_date','nav']+net_cols],x_column='trade_date',bar_plot=False,yaxis2_cols=['nav'])
iplot(f)


plotly.graph_objs.Margin is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.Margin




In [51]:
f = ju.plotly_plot(ddd[['trade_date','nav','monman_net','other_net']],x_column='trade_date',bar_plot=False,yaxis2_cols=['nav'])
iplot(f)

## COT vs Market Graphs
Run the cell below to create a graph for each commodity which displays both the COT history for that commodity and the price history for that commodity.

In [52]:
dict_df['gold'].tail(15)
dict_df.keys()

dict_keys(['gold', 'silver', 'cotton', 'soyb', 'soyo', 'wheat', 'corn', 'cl', 'ng', 'hg', 'pl', 'sugar', 'cocoa'])

In [53]:
dict_df['cl'].head(30)

Unnamed: 0,trade_date,nav,prod_net,monman_net,swap_net,other_net,nonrep_net,prod_ratio,monman_ratio,swap_ratio,other_ratio
0,20100901,73.91,-199158.0,71495.0,172980.0,-58375.0,13058.0,0.405168,1.608795,2.196472,0.593724
1,20100902,75.02,-199158.0,71495.0,172980.0,-58375.0,13058.0,0.405168,1.608795,2.196472,0.593724
2,20100903,74.6,-199158.0,71495.0,172980.0,-58375.0,13058.0,0.405168,1.608795,2.196472,0.593724
3,20100907,74.09,-186602.0,77545.0,161163.0,-67056.0,14950.0,0.502213,1.707314,2.03899,0.55427
4,20100909,74.25,-186602.0,77545.0,161163.0,-67056.0,14950.0,0.502213,1.707314,2.03899,0.55427
5,20100910,76.45,-186602.0,77545.0,161163.0,-67056.0,14950.0,0.502213,1.707314,2.03899,0.55427
6,20100913,77.19,-186602.0,77545.0,161163.0,-67056.0,14950.0,0.502213,1.707314,2.03899,0.55427
7,20100914,76.8,-186614.0,113760.0,133777.0,-65423.0,4500.0,0.500702,2.241068,1.828977,0.554831
8,20100915,76.02,-186614.0,113760.0,133777.0,-65423.0,4500.0,0.500702,2.241068,1.828977,0.554831
9,20100916,74.57,-186614.0,113760.0,133777.0,-65423.0,4500.0,0.500702,2.241068,1.828977,0.554831


In [54]:
log.setLevel(dashapp.logging.INFO)
y_left_label = 'y nav'
y_right_label = 'monman_net'

df_all_in = None
for k in dict_df.keys():
    df_in = dict_df[k]
    if len(df_in)>1:
        df_in_with_commod = df_in.copy()
        df_in_with_commod['symbol'] = k
        if df_all_in is None:
            df_all_in = df_in_with_commod.copy()
        else:
            df_all_in = df_all_in.append(df_in_with_commod,ignore_index=True)
    last_date = df_in.trade_date.max()
    xc = 'trade_date'
    plot_title = f'{k} last trade date {last_date}'
    try:
        fig = ju.plotly_pandas(df_in[['trade_date', 'nav', y_right_label]],x_column=xc,plot_title=plot_title,y_left_label=y_left_label,y_right_label=y_right_label)
        fig['layout'].hovermode='x'
        iplot(fig)
    except Exception as e:
        print(f'{k} {str(e)}')
df_all_in.to_csv(f'{TEMP_FOLDER}/df_all_in.csv',index=False)


In [26]:
# this is the old "non plotly" plotting routine
# ju.multi_df_plot(dict_df=dict_df,x_column='trade_date',num_of_x_ticks=40,save_file_prefix='cot_nav_plot',save_image_folder='./temp_folder/saved_images')    


___
### Try various strategies based on above charts
___

In [27]:
dict_df['cl'].columns.values

array(['trade_date', 'nav', 'prod_net', 'monman_net', 'swap_net',
       'other_net', 'nonrep_net', 'prod_ratio', 'monman_ratio',
       'swap_ratio', 'other_ratio'], dtype=object)

In [28]:
field_to_chart = 'nav'
field_to_chart2 = 'monman_net'
for sym in dict_df.keys():
    df_all_in = dict_df[sym].copy()    
    df_all_in[f'{field_to_chart2}_prev'] = df_all_in[field_to_chart2].shift(1)
    df_all_in['is_transition'] = df_all_in.apply(lambda r: 1 if r[field_to_chart2] / r[f'{field_to_chart2}_prev'] < 0 else 0,axis=1)
    df_all_in['trans_sign'] = df_all_in.apply(lambda r: 1 if r[field_to_chart2] - r[f'{field_to_chart2}_prev'] > 0 else -1,axis=1)
    df_all_in['transition'] = df_all_in.is_transition * df_all_in.trans_sign
    df_all_in2 = df_all_in[['trade_date',field_to_chart,'transition']]
    fig = ju.plotly_pandas(df_all_in2[['trade_date',field_to_chart,'transition']],x_column='trade_date',plot_title=sym)
    iplot(fig)
# len(df_all_in[df_all_in.tran_count==1])/len(df_all_in)

2020-06-15 17:14:21,390 - numexpr.utils - INFO - NumExpr defaulting to 4 threads.


___
### The cells below help you find commodities in df_cot2 and df_oi
___

In [29]:
df_oi[df_oi.Product_Description=='SUGAR 11 FUTURES'].tail()

Unnamed: 0,CME__Globex__Volume,Commodity_Indicator,Description,ExPit_Volume,Exchange_Name,Future_Option_Indicator,MTD_ADV,None,OTC_Volume,Open_Interest,Pit_Volume,Product_Description,Total_Volume,trade_date
1166509,0.0,YO,AG PRODUCTS,0,NYMEX(STATS),F,0.0,,0,0,0,SUGAR 11 FUTURES,0,20180702
1385247,7.0,YO,AG PRODUCTS,0,NYMEX(STATS),F,3.5,,0,3,0,SUGAR 11 FUTURES,7,20190604
1386177,2.0,YO,AG PRODUCTS,0,NYMEX(STATS),F,3.0,,0,1,0,SUGAR 11 FUTURES,2,20190605
1387114,1.0,YO,AG PRODUCTS,0,NYMEX(STATS),F,2.5,,0,0,0,SUGAR 11 FUTURES,1,20190606
1388082,0.0,YO,AG PRODUCTS,0,NYMEX(STATS),F,2.0,,0,0,0,SUGAR 11 FUTURES,0,20190607


In [30]:
list(filter(lambda s: str(s)!='nan' and 'COCOA' in s and 'FUTURE' in s,list(set(df_oi.Product_Description))))

['COCOA FUTURES']

In [31]:
list(filter(lambda s: str(s)!='nan' and 'COCOA' in s ,list(set(df_cot2.Market_and_Exchange_Names))))

['COCOA - ICE FUTURES U.S.']

In [32]:
print(df_cot2[df_cot2.Market_and_Exchange_Names=='COTTON NO. 2 - NEW YORK COTTON EXCHANGE'].Open_Interest_All.sum())
print(df_cot2[df_cot2.Market_and_Exchange_Names=='COTTON NO. 2 - ICE FUTURES U.S.'].Open_Interest_All.sum())


0
110323110


In [33]:
list(filter(lambda s: 'Comm' in s,df_cot2.columns.values))

[]

In [34]:
df_cot2_cl = df_cot2[df_cot2.Market_and_Exchange_Names=='CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE']
df_cot2_cl = df_cot2_cl.sort_values('As_of_Date_In_Form_YYMMDD')
df_cot2_cl.tail()

Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,Prod_Merc_Positions_Long_All,Swap_Positions_Long_All,M_Money_Positions_Long_All,Other_Rept_Positions_Long_All,NonRept_Positions_Long_All,Tot_Rept_Positions_Long_All,Prod_Merc_Positions_Short_All,...,swap_net,swap_ratio,monman_net,monman_ratio,other_net,other_ratio,nonrep_net,nonrep_ratio,totrep_net,totrep_ratio
15256,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE",2020-05-12,2248020,491543,134875,405071,304486,102997,2145023,600425,...,-449089.0,0.230965,351685.0,7.587589,189334.0,2.644209,16952.0,1.197013,-16952.0,0.992159
15257,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE",2020-05-19,2158749,425213,139073,401853,290164,103753,2054996,546228,...,-453940.0,0.234519,360183.0,9.643701,183420.0,2.718317,31352.0,1.433033,-31352.0,0.984973
15258,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE",2020-05-26,2135678,408856,141849,420655,280579,101226,2034452,547988,...,-432008.0,0.247185,374810.0,9.175592,167764.0,2.487072,28566.0,1.393146,-28566.0,0.986153
15259,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE",2020-06-02,2127664,400183,148732,438274,290898,103180,2024484,570274,...,-429923.0,0.257031,394131.0,9.928505,174199.0,2.492721,31684.0,1.443158,-31684.0,0.984591
15260,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE",2020-06-09,2126629,397123,153372,444681,288253,104697,2021932,574214,...,-431472.0,0.262244,397549.0,9.4348,170360.0,2.445039,40654.0,1.634792,-40654.0,0.98029


## END

In [35]:
comod = 'hg'
fig = ju.plotly_pandas(dict_df[comod][['trade_date', 'monman_net', 'monman_ratio']],x_column='trade_date',plot_title=comod)
iplot(fig)


In [36]:
def get_cot2(id):
    basic_cols = ['Market_and_Exchange_Names','As_of_Date_In_Form_YYMMDD','Open_Interest_All']
    long_cols = ['M_Money_Positions_Long_All','Other_Rept_Positions_Long_All','Prod_Merc_Positions_Long_All',
                'NonRept_Positions_Long_All']
    short_cols = ['M_Money_Positions_Short_All','Other_Rept_Positions_Short_All','Prod_Merc_Positions_Short_All',
                'NonRept_Positions_Short_All']

    df_ret = df_cot2[df_cot2.Market_and_Exchange_Names==ID_DICT[id]['COT']]
    df_ret = df_ret[basic_cols + long_cols + short_cols]
    return df_ret
dfc = get_cot2('cl')

In [37]:
get_cot2('cl').columns.values

array(['Market_and_Exchange_Names', 'As_of_Date_In_Form_YYMMDD',
       'Open_Interest_All', 'M_Money_Positions_Long_All',
       'Other_Rept_Positions_Long_All', 'Prod_Merc_Positions_Long_All',
       'NonRept_Positions_Long_All', 'M_Money_Positions_Short_All',
       'Other_Rept_Positions_Short_All', 'Prod_Merc_Positions_Short_All',
       'NonRept_Positions_Short_All'], dtype=object)

In [38]:
dfc[dfc.As_of_Date_In_Form_YYMMDD=='2019-04-23']

Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,M_Money_Positions_Long_All,Other_Rept_Positions_Long_All,Prod_Merc_Positions_Long_All,NonRept_Positions_Long_All,M_Money_Positions_Short_All,Other_Rept_Positions_Short_All,Prod_Merc_Positions_Short_All,NonRept_Positions_Short_All
15202,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE",2019-04-23,2139213,352453,292217,448757,96400,22057,75254,507806,73290


In [39]:
dfc = df_cot2[df_cot2.Market_and_Exchange_Names==ID_DICT['cl']['COT']]

In [40]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(dfc[dfc.As_of_Date_In_Form_YYMMDD=='2019-04-23'].iloc[0])


Market_and_Exchange_Names         CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE
As_of_Date_In_Form_YYMMDD                                           2019-04-23 00:00:00
Open_Interest_All                                                               2139213
Prod_Merc_Positions_Long_All                                                     448757
Swap_Positions_Long_All                                                          158645
M_Money_Positions_Long_All                                                       352453
Other_Rept_Positions_Long_All                                                    292217
NonRept_Positions_Long_All                                                        96400
Tot_Rept_Positions_Long_All                                                     2042813
Prod_Merc_Positions_Short_All                                                    507806
Swap_Positions_Short_All                                                         670065
M_Money_Positions_Short_All     

In [41]:
from collections import OrderedDict
dfc2 = dfc.to_dict()#('rows')
dfc3 = OrderedDict(sorted(dfc2.items(), key=lambda x: dfc.columns.get_loc(x[0])))

In [42]:
dfc.columns.get_loc(list(dfc2.items())[2][0])

2

In [43]:
pd.DataFrame(dfc.to_dict('rows')).columns.values

array(['Market_and_Exchange_Names', 'As_of_Date_In_Form_YYMMDD',
       'Open_Interest_All', 'Prod_Merc_Positions_Long_All',
       'Swap_Positions_Long_All', 'M_Money_Positions_Long_All',
       'Other_Rept_Positions_Long_All', 'NonRept_Positions_Long_All',
       'Tot_Rept_Positions_Long_All', 'Prod_Merc_Positions_Short_All',
       'Swap_Positions_Short_All', 'M_Money_Positions_Short_All',
       'Other_Rept_Positions_Short_All', 'NonRept_Positions_Short_All',
       'Tot_Rept_Positions_Short_All', 'prod_net', 'prod_ratio',
       'swap_net', 'swap_ratio', 'monman_net', 'monman_ratio',
       'other_net', 'other_ratio', 'nonrep_net', 'nonrep_ratio',
       'totrep_net', 'totrep_ratio'], dtype=object)

In [44]:
dfc.columns.values#.get_loc(key='Open_Interest_All')

array(['Market_and_Exchange_Names', 'As_of_Date_In_Form_YYMMDD',
       'Open_Interest_All', 'Prod_Merc_Positions_Long_All',
       'Swap_Positions_Long_All', 'M_Money_Positions_Long_All',
       'Other_Rept_Positions_Long_All', 'NonRept_Positions_Long_All',
       'Tot_Rept_Positions_Long_All', 'Prod_Merc_Positions_Short_All',
       'Swap_Positions_Short_All', 'M_Money_Positions_Short_All',
       'Other_Rept_Positions_Short_All', 'NonRept_Positions_Short_All',
       'Tot_Rept_Positions_Short_All', 'prod_net', 'prod_ratio',
       'swap_net', 'swap_ratio', 'monman_net', 'monman_ratio',
       'other_net', 'other_ratio', 'nonrep_net', 'nonrep_ratio',
       'totrep_net', 'totrep_ratio'], dtype=object)

In [45]:
pd.DataFrame(dfc2).columns.values == dfc.columns.values

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True])

In [46]:
df_temp = pd.read_csv(f'{cme_csv_save_folder}/cme_open_interest_2019.csv')#[['trade_date','Open_Interest']]
df_temp = df_temp.sort_values('trade_date')
df_temp = df_temp[df_temp.Commodity_Indicator=='NG'][['trade_date','Open_Interest']]
df_temp.index = list(range(len(df_temp)))
ju.plotly_plot(df_temp,x_column='trade_date',bar_plot=False)


plotly.graph_objs.Margin is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.Margin




In [47]:
ID_DICT['silver']

{'OI': 'SILVER FUTURES',
 'COT': 'SILVER - COMMODITY EXCHANGE INC.',
 'ETF': 'SIZ99',
 'ETF_DIVISOR': 5000}

### END