In [1]:
import warnings
warnings.filterwarnings('ignore')

___
# Merge csv files for ETF shares, CME Open Interest, and CFTC COT reports.
___

### Usage:
Run all the cells in this workbook after you have run all of the "build" notebooks in this project

### Main Graphs:
The graphs that follow the header ```COT vs Market Graphs``` show the graph of the lastest COT (new format) for the main physical commodities vs a graph of the settlement prices for those commodities, or for an ETF that closely tracts those commodities (think Comex GC vs the ETF GLD).


In [53]:
import pandas as pd
from pandas_datareader import data as pdr
import numpy as np
import sys
import os
abs_folders = [os.path.abspath(d) for d in ['./','../']]
for af in abs_folders:
    if  not af in sys.path:
        sys.path.append(af)
from cme_open_interest import db_info

import datetime
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly.offline import iplot
import plotly.io as pio
from plotly.subplots import make_subplots
from plotly.offline import  init_notebook_mode, iplot
init_notebook_mode(connected=True)

import zipfile
import urllib.request
from PIL import Image
import jupyter_utilities as ju
import importlib
import pandasql as psql
import traceback
import pdb

# Make important folders
TEMP_FOLDER = './temp_folder'
try:
    os.mkdir(TEMP_FOLDER)
except:
    pass
SAVE_IMAGE_FOLDER = f'{TEMP_FOLDER}/gold'
try:
    os.mkdir(SAVE_IMAGE_FOLDER)
except:
    pass

def to_int(s,print_full_exception=False):
    try:
        return int(float(str(s)))
    except Exception as e:
        print(f'to_int exception on value:{s}')
        if print_full_exception:
            traceback.print_exc()
        return None

pd.set_option('display.max_colwidth',1000)
if os.path.abspath('../')  not in sys.path:
    if '.' not in sys.path:
        sys.path.append(os.path.abspath('../'))
import barchart_api as bcapi
import importlib
import json

cme_csv_save_folder = './cme_oi_data'
cot_data_path = './cot_net_new_history.csv'
etf_data_path = './etf_cap_hist.csv'

In [3]:
opttab = 'sec_schema.options_table'
futtab = 'sec_schema.underlying_table'

pga = db_info.get_db_info()
print(f"futtab max date: {pga.get_sql(f'select max(settle_date) from {futtab}')}")
print(f"opttab max date: {pga.get_sql(f'select max(settle_date) from {opttab}')}")


  sec_db
futtab max date:         max
0  20200521
opttab max date:         max
0  20200521


### Define commodity and etf identifiers in the csv files

In [4]:
OI_ID_GOLD = 'GOLD FUTURES'
OI_ID_SILVER = 'SILVER FUTURES'
OI_ID_CL = 'CRUDE OIL LIGHT SWEET FUTURES'
OI_ID_NG = 'NATURAL GAS FUTURES'
OI_ID_10Y = '10Y NOTE FUTURE'
OI_ID_SPY = 'E-MINI S&P 500 FUTURE'
OI_ID_SOYB = 'SOYBEAN FUTURE'
OI_ID_SOYO = 'SOYBEAN OIL FUTURE'
OI_ID_CORN = 'CORN FUTURE'
OI_ID_WHEAT = 'CHICAGO SRW WHEAT FUTURE'
OI_ID_COTTON = 'COTTON FUTURES'
OI_ID_EURO = 'EURO FX FUTURE'
OI_ID_ED = 'EURODOLLAR FUTURE'
OI_ID_HG = 'HIGH GRADE COPPER FUTURES'
OI_ID_PL = 'PLATINUM FUTURES'
OI_ID_SUGAR = 'SUGAR 11 FUTURES'
OI_ID_COCOA = 'COCOA FUTURES'

COT_ID_GOLD= 'GOLD - COMMODITY EXCHANGE INC.'
COT_ID_SILVER= 'SILVER - COMMODITY EXCHANGE INC.'
# COT_ID_CL = 'CRUDE OIL, LIGHT SWEET'
COT_ID_CL = 'CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE'
COT_ID_NG = 'NATURAL GAS - NEW YORK MERCANTILE EXCHANGE'
COT_ID_10Y = '10-YEAR U.S. TREASURY NOTES - CHICAGO BOARD OF TRADE'
COT_ID_SPY = 'E-MINI S&P 500 STOCK INDEX - CHICAGO MERCANTILE EXCHANGE'
COT_ID_SOYB = 'SOYBEANS - CHICAGO BOARD OF TRADE'
COT_ID_SOYO = 'SOYBEAN OIL - CHICAGO BOARD OF TRADE'
# COT_ID_WHEAT = 'WHEAT - CHICAGO BOARD OF TRADE'
COT_ID_COTTON = 'COTTON NO. 2 - ICE FUTURES U.S.'
COT_ID_WHEAT = 'WHEAT-SRW - CHICAGO BOARD OF TRADE'
COT_ID_CORN = 'CORN - CHICAGO BOARD OF TRADE'
COT_ID_EURO = 'EURO FX - CHICAGO MERCANTILE EXCHANGE'
COT_ID_ED = '3-MONTH EURODOLLARS - CHICAGO MERCANTILE EXCHANGE'
COT_ID_HG = 'COPPER-GRADE #1 - COMMODITY EXCHANGE INC.'
COT_ID_PL = 'PLATINUM - NEW YORK MERCANTILE EXCHANGE'
COT_ID_SUGAR = 'SUGAR NO. 11 - ICE FUTURES U.S.'
COT_ID_COCOA = 'COCOA - ICE FUTURES U.S.'

ETF_ID_GOLD = 'GCZ99'
ETF_ID_SILVER = 'SIZ99'
ETF_ID_CL = 'CLZ99'
ETF_ID_NG = 'NGZ99'
ETF_ID_10Y = 'AGG'
ETF_ID_SPY = 'SPY'
ETF_ID_COTTON = 'BAL'
ETF_ID_SOYB = 'ZSZ99'#'ZSY00'
ETF_ID_SOYO = 'ZLZ99'#'ZLY00'
ETF_ID_WHEAT = 'ZWZ99'#'ZWY00'#'WEAT'
ETF_ID_CORN = 'ZCZ99'#'CORN'
ETF_ID_EURO = 'FXE'
ETF_ID_ED = 'BSV'
ETF_ID_HG = 'CPER'
ETF_ID_PL = 'PPLT'
ETF_ID_SUGAR = 'SGG'
ETF_ID_COCOA = 'NIB'

ETF_SHARES_DIVISOR_GOLD = 1000
ETF_SHARES_DIVISOR_SILVER = 5000
ETF_SHARES_DIVISOR_10Y = 1000
ETF_SHARES_DIVISOR_SPY = 500
ETF_SHARES_DIVISOR_COTTON = 50000
ETF_SHARES_DIVISOR_SOYB = 5000
ETF_SHARES_DIVISOR_SOYO = 1
ETF_SHARES_DIVISOR_WHEAT = 5000
ETF_SHARES_DIVISOR_CORN = 5000
ETF_SHARES_DIVISOR_EURO = 1000
ETF_SHARES_DIVISOR_CL = 4000
ETF_SHARES_DIVISOR_NG = 1250
ETF_SHARES_DIVISOR_ED = 1000000
ETF_SHARES_DIVISOR_HG = 2500
ETF_SHARES_DIVISOR_PL = 500
ETF_SHARES_DIVISOR_SUGAR = int((112000/100)/3)
ETF_SHARES_DIVISOR_COCOA = 100


ID_DICT = {
    'gold':{'OI':OI_ID_GOLD,'COT':COT_ID_GOLD,'ETF':ETF_ID_GOLD,'ETF_DIVISOR':ETF_SHARES_DIVISOR_GOLD},
    'silver':{'OI':OI_ID_SILVER,'COT':COT_ID_SILVER,'ETF':ETF_ID_SILVER,'ETF_DIVISOR':ETF_SHARES_DIVISOR_SILVER},
#     '10Y':{'OI':OI_ID_10Y,'COT':COT_ID_10Y,'ETF':ETF_ID_10Y,'ETF_DIVISOR':ETF_SHARES_DIVISOR_10Y},
#     'spy':{'OI':OI_ID_SPY,'COT':COT_ID_SPY,'ETF':ETF_ID_SPY,'ETF_DIVISOR':ETF_SHARES_DIVISOR_SPY},
    'cotton':{'OI':OI_ID_COTTON,'COT':COT_ID_COTTON,'ETF':ETF_ID_COTTON,'ETF_DIVISOR':ETF_SHARES_DIVISOR_COTTON},
    'soyb':{'OI':OI_ID_SOYB,'COT':COT_ID_SOYB,'ETF':ETF_ID_SOYB,'ETF_DIVISOR':ETF_SHARES_DIVISOR_SOYB},
    'soyo':{'OI':OI_ID_SOYO,'COT':COT_ID_SOYO,'ETF':ETF_ID_SOYO,'ETF_DIVISOR':ETF_SHARES_DIVISOR_SOYO},
    'wheat':{'OI':OI_ID_WHEAT,'COT':COT_ID_WHEAT,'ETF':ETF_ID_WHEAT,'ETF_DIVISOR':ETF_SHARES_DIVISOR_WHEAT},
    'corn':{'OI':OI_ID_CORN,'COT':COT_ID_CORN,'ETF':ETF_ID_CORN,'ETF_DIVISOR':ETF_SHARES_DIVISOR_CORN},
#     'euro':{'OI':OI_ID_EURO,'COT':COT_ID_EURO,'ETF':ETF_ID_EURO,'ETF_DIVISOR':ETF_SHARES_DIVISOR_EURO},
    'cl':{'OI':OI_ID_CL,'COT':COT_ID_CL,'ETF':ETF_ID_CL,'ETF_DIVISOR':ETF_SHARES_DIVISOR_CL},
    'ng':{'OI':OI_ID_NG,'COT':COT_ID_NG,'ETF':ETF_ID_NG,'ETF_DIVISOR':ETF_SHARES_DIVISOR_NG},
#     'ed':{'OI':OI_ID_ED,'COT':COT_ID_ED,'ETF':ETF_ID_ED,'ETF_DIVISOR':ETF_SHARES_DIVISOR_ED},
    'hg':{'OI':OI_ID_HG,'COT':COT_ID_HG,'ETF':ETF_ID_HG,'ETF_DIVISOR':ETF_SHARES_DIVISOR_HG},
    'pl':{'OI':OI_ID_PL,'COT':COT_ID_PL,'ETF':ETF_ID_PL,'ETF_DIVISOR':ETF_SHARES_DIVISOR_PL},
    'sugar':{'OI':OI_ID_SUGAR,'COT':COT_ID_SUGAR,'ETF':ETF_ID_SUGAR,'ETF_DIVISOR':ETF_SHARES_DIVISOR_SUGAR},
    'cocoa':{'OI':OI_ID_COCOA,'COT':COT_ID_COCOA,'ETF':ETF_ID_COCOA,'ETF_DIVISOR':ETF_SHARES_DIVISOR_COCOA},
}


YEAR_OFFSET = 0 if datetime.datetime.now() > datetime.datetime(2020,1,10) else 1

In [5]:
ID_DICT['wheat']['ETF']

'ZWZ99'

___
## Define help access routines
___

In [6]:
# COT helpers
def df_cot_by_name(dict_id='cl',df_cot=None):
    dfc = df_cot2 if df_cot is None else df_cot
    cot_id = ID_DICT[dict_id]['COT']
    return dfc[dfc.Market_and_Exchange_Names==cot_id]


___
### Get cme open interest, COT and ETF data from csv files
___

In [7]:
import traceback
df_oi = None
last_year = int(datetime.datetime.now().year)
years = np.linspace(2013,last_year-YEAR_OFFSET,last_year-2013+1,dtype=int)
df_oi = None
for y in years:
    df_temp = pd.read_csv(f'{cme_csv_save_folder}/cme_open_interest_{y}.csv')
    df_temp = df_temp[~df_temp.Open_Interest.isnull()]
    if df_oi is None:
        df_oi = df_temp.copy()
    else:
        df_oi = df_oi.append(df_temp,ignore_index=True)
        df_oi.index = list(range(len(df_oi)))
df_oi = df_oi[~df_oi.Total_Volume.isnull()]
df_oi.ExPit_Volume = df_oi.ExPit_Volume.fillna(0)
df_oi = df_oi[~df_oi.Open_Interest.astype(str).str.contains('T')]
df_oi.Open_Interest = df_oi.Open_Interest.apply(to_int)
df_oi.Total_Volume = df_oi.Total_Volume.apply(to_int)
print(f'oi length:{len(df_oi)}')
df_etf = pd.read_csv(etf_data_path)
df_etf['trade_date'] = df_etf.date.apply(ju.str_to_yyyymmdd)
print(f'etf length:{len(df_etf)}')
df_cot2 = pd.read_csv(cot_data_path)
df_cot2.As_of_Date_In_Form_YYMMDD = df_cot2.As_of_Date_In_Form_YYMMDD.apply(ju.str_to_date)
df_cot2.Market_and_Exchange_Names = df_cot2.Market_and_Exchange_Names.str.strip() 
print(f'cot length:{len(df_cot2)}')


oi length:1603370
etf length:23064
cot length:57215


### For some of the ETF's, get the data from yahoo, and ignore the shares data

In [8]:
def fetch_history(symbol,dt_beg,dt_end):
    df = pdr.DataReader(symbol, 'yahoo', dt_beg, dt_end)
    df['date'] = df.index
    df.date = df.date.apply(lambda d: str(d)[0:4] + "-" + str(d)[5:7] + "-" + str(d)[8:10])
    df['trade_date'] = df.date.apply(lambda d: int(str(d)[0:4] + str(d)[5:7] + str(d)[8:10]))
    df = df.sort_values('date')
    df.index = list(range(len(df)))
    # make adj close the close
    df['nav'] = df['Adj Close']
    df['symbol'] = symbol
    df['shares'] = 0
    df = df[['symbol','date','nav','shares','trade_date']]
    return df

    

In [9]:
df_soyb = fetch_history(ETF_ID_SUGAR,datetime.datetime(2015,1,1),datetime.datetime(2020,12,31))
# print(df_etf.tail())
print(df_soyb.tail())
print(df_soyb.head())
# df_etf = df_etf.append(df_soyb,ignore_index=True)

    symbol        date        nav  shares  trade_date
581    SGG  2020-05-18  34.342999       0    20200518
582    SGG  2020-05-19  34.735001       0    20200519
583    SGG  2020-05-20  36.321999       0    20200520
584    SGG  2020-05-21  35.125999       0    20200521
585    SGG  2020-05-22  35.169998       0    20200522
  symbol        date        nav  shares  trade_date
0    SGG  2018-01-25  49.790001       0    20180125
1    SGG  2018-01-26  49.790001       0    20180126
2    SGG  2018-01-29  49.790001       0    20180129
3    SGG  2018-01-30  49.790001       0    20180130
4    SGG  2018-01-31  49.630001       0    20180131


In [10]:
def get_nasdaq_commod(commod='ZL'):
    df_zl = pd.read_csv(f'https://www.nasdaq.com/api/v1/historical/{commod}/commodities/2015-11-15/2022-12-15')
#     df_soyo = pd.DataFrame({'date':df_zl.as_matrix()[:,0],'nav':df_zl.as_matrix()[:,1],'shares':0})
    df_soyo = pd.DataFrame({'date':df_zl.values[:,0],'nav':df_zl.values[:,1],'shares':0})
    df_soyo['symbol'] = 'SOYO'
    df_soyo['date'] = df_soyo.date.apply(lambda s: s[6:10] + '-' + s[0:2] + '-' + s[3:5])
    df_soyo  = df_soyo.sort_values('date')
    df_soyo['trade_date'] = df_soyo.date.apply(lambda s: int(s.replace('-','')))
    return df_soyo[['symbol','date','nav','shares','trade_date']]

In [11]:
ETF_ID_SOYO,ETF_ID_SOYO[-3:]

('ZLZ99', 'Z99')

In [12]:
def get_barchart_commod(commod='ZL',month='Z',year=99):
    sql = f"""
    with 
    f1 as (
        select * from {futtab} where symbol = '{commod}{month}{year}'
    )
    select * from f1
    """
    df = pga.get_sql(sql)
    df['date'] = df.settle_date.apply(lambda v: f"{str(v)[0:4]}-{str(v)[4:6]}-{str(v)[6:8]}")
    df['nav'] = df.close
    df['trade_date'] = df.settle_date
    df['shares'] = 0

    df = df[['symbol','date','nav','shares','trade_date']]
    return df    

In [13]:
df_wheat = get_barchart_commod('ZC')
df_wheat.tail()

Unnamed: 0,symbol,date,nav,shares,trade_date
2420,ZCZ99,2020-05-15,312.75,0,20200515
2421,ZCZ99,2020-05-18,318.25,0,20200518
2422,ZCZ99,2020-05-19,318.75,0,20200519
2423,ZCZ99,2020-05-20,317.0,0,20200520
2424,ZCZ99,2020-05-21,315.25,0,20200521


In [14]:
# get_nasdaq_commod(ETF_ID_SOYO[:2])

### Find identifier strings for specific Open Interest and COT rows in their respective DataFrames
1. Enter values for oi_key_word, cot_key_word and etf_key_word below
2. Choose the product/market_and_exchange_name/symbol that has the highest open_interest or volume

In [15]:
import pdb
def create_merged_df(commod_to_use):
    global df_oi,df_etf,df_cot2
    # step 1 set up ID's
    OI_ID = ID_DICT[commod_to_use]['OI']
    COT_ID = ID_DICT[commod_to_use]['COT']
    ETF_ID = ID_DICT[commod_to_use]['ETF']
    ETF_DIVISOR = ID_DICT[commod_to_use]['ETF_DIVISOR']
    #Step 2: make sure ID's produce only one contract
    oi_key_word = OI_ID.lower()
    cot_key_word = COT_ID.lower()
    etf_key_word = ETF_ID.lower()

    l = (list(filter(lambda s: oi_key_word == str(s).lower(),df_oi.Product_Description.unique())))
    df_oi_sub = df_oi[df_oi.Product_Description.isin(l)][['Product_Description','Open_Interest']]
    df_oi_gb = df_oi_sub.groupby('Product_Description',as_index=False).sum()

    l = (list(filter(lambda s: cot_key_word == str(s).lower(),df_cot2.Market_and_Exchange_Names.unique())))
    df_cot_single = df_cot2[df_cot2.Market_and_Exchange_Names==COT_ID]
    df_cot_sub = df_cot2[df_cot2.Market_and_Exchange_Names.isin(l)][['Market_and_Exchange_Names','Open_Interest_All']]
    df_cot_gb = df_cot_sub.groupby('Market_and_Exchange_Names',as_index=False).sum()

    l = (list(filter(lambda s: etf_key_word == str(s).lower(),df_etf.symbol.unique())))
    dtmin = str(df_cot_single.As_of_Date_In_Form_YYMMDD.min())[0:10]
    dtmax = str(datetime.datetime.now())[0:10]
    #pdb.set_trace()
    if ETF_ID[-3:] == 'Y00':
        print(f'using get_nasdaq_commod for {ETF_ID}')
        df_etf_single = get_nasdaq_commod(ETF_ID[:2])
    elif ETF_ID[-3:] == 'Z99':
        print(f'using get_barchart_commod for {ETF_ID}')
        df_etf_single = get_barchart_commod(ETF_ID[:2])
    else:
        df_etf_single = fetch_history(ETF_ID,dtmin,dtmax)
    df_etf_gb = df_etf_single.groupby('symbol',as_index=False).sum()
    

    if len(df_oi_gb) + len(df_cot_gb) + len(df_etf_gb) == 3:
        print(f"all ID's for commodity: {commod_to_use} are OK")
    else:
        print(f"!!!!! ALL ID's FOR COMMODITY: {commod_to_use} ARE NOT OK!!!!!!")
        print(f'oi is OK: {len(df_oi_gb)==1}')
        print(f'cot is OK: {len(df_cot_gb)==1}')
        print(f'etf is OK: {len(df_etf_gb)==1}')
        raise ValueError(f'ambiguous ID name {OI_ID}')
    
    
    # Step 3: merge oi, cot and etf stuff
    df_oi_single = df_oi[df_oi.Product_Description == OI_ID][['trade_date','Open_Interest','Total_Volume']]
    df_oi_single.Open_Interest = df_oi_single.Open_Interest.apply(to_int)
    df_oi_single.Total_Volume = df_oi_single.Total_Volume.astype(float).astype(int)
    df_commod_net = df_cot_single.copy()

    df_commod_net['cot_yyyymmdd'] = df_commod_net.As_of_Date_In_Form_YYMMDD.apply(ju.str_to_yyyymmdd)
    df_commod_net = df_commod_net.sort_values('cot_yyyymmdd')
    df_commod_net.index = list(range(len(df_commod_net)))

    last_date = ju.str_to_date(str(df_commod_net.iloc[-1].cot_yyyymmdd),sep='') + datetime.timedelta(7)
    last_date_yyyymmdd = ju.str_to_yyyymmdd(last_date)
    df_commod_net['next_cot_yyyymmdd'] = list(df_commod_net[1:].cot_yyyymmdd) + [last_date_yyyymmdd]

    df_etf_oi = df_etf_single[['trade_date','nav','shares']].merge(df_oi_single,how='inner',on='trade_date')
    df_etf_oi['nav_diff'] = df_etf_oi.nav.pct_change()
    df_etf_oi['share_diff'] = df_etf_oi.shares.pct_change()
    df_etf_oi['oi_diff'] = df_etf_oi.Open_Interest.pct_change()
    
    df_etf_oi = df_etf_single.copy()
    q = f"select * from df_etf_oi inner join df_commod_net on df_etf_oi.trade_date >= df_commod_net.cot_yyyymmdd and df_etf_oi.trade_date < df_commod_net.next_cot_yyyymmdd"
    df_etf_oi_cot =  psql.sqldf(q, locals())
#     etf_oi_cols = list(df_etf_oi.columns.values)
#     cot_cols = list(cols_to_change.values()) + ['cot_yyyymmdd','next_cot_yyyymmdd']
#     etf_oi_cot_cols = etf_oi_cols + cot_cols
#     df_etf_oi_cot = df_etf_oi_cot[etf_oi_cot_cols]

    # step 4: create final,
    cot_cols = ['prod_net','monman_net','swap_net','other_net','nonrep_net','prod_ratio','monman_ratio','swap_ratio','other_ratio']
    df_final = df_etf_oi_cot[['trade_date','nav']+cot_cols][-1000:]
    df_final = df_final.loc[:,~df_final.columns.duplicated()]
    df_final = df_final.sort_values('trade_date')
    return df_final


In [16]:
create_merged_df('silver').tail()


using get_barchart_commod for SIZ99
all ID's for commodity: silver are OK


2020-05-22 11:34:16,898 - numexpr.utils - INFO - NumExpr defaulting to 4 threads.


Unnamed: 0,trade_date,nav,prod_net,monman_net,swap_net,other_net,nonrep_net,prod_ratio,monman_ratio,swap_ratio,other_ratio
1393,20200512,154.345,-38490.0,13943.0,-2191.0,11830.0,14908.0,0.206752,1.918511,0.955396,4.109884
1394,20200513,155.532,-38490.0,13943.0,-2191.0,11830.0,14908.0,0.206752,1.918511,0.955396,4.109884
1395,20200514,158.494,-38490.0,13943.0,-2191.0,11830.0,14908.0,0.206752,1.918511,0.955396,4.109884
1396,20200515,166.019,-38490.0,13943.0,-2191.0,11830.0,14908.0,0.206752,1.918511,0.955396,4.109884
1397,20200518,169.233,-38490.0,13943.0,-2191.0,11830.0,14908.0,0.206752,1.918511,0.955396,4.109884


### Create multi plot of all data commodities

In [17]:
import traceback
dict_df = {}
last_n_days = 1000
for k in ID_DICT.keys():
    print(f'processing {k}')
    try:
        df = create_merged_df(k)
        dict_df[k] = df.iloc[-last_n_days:]
    except Exception as e:
        print(f'EXCEPTION: {str(e)}')
#         print(traceback.print_exc())

processing gold
using get_barchart_commod for GCZ99
all ID's for commodity: gold are OK
processing silver
using get_barchart_commod for SIZ99
all ID's for commodity: silver are OK
processing cotton
all ID's for commodity: cotton are OK
processing soyb
using get_barchart_commod for ZSZ99
all ID's for commodity: soyb are OK
processing soyo
using get_barchart_commod for ZLZ99
all ID's for commodity: soyo are OK
processing wheat
using get_barchart_commod for ZWZ99
all ID's for commodity: wheat are OK
processing corn
using get_barchart_commod for ZCZ99
all ID's for commodity: corn are OK
processing cl
using get_barchart_commod for CLZ99
all ID's for commodity: cl are OK
processing ng
using get_barchart_commod for NGZ99
all ID's for commodity: ng are OK
processing hg
all ID's for commodity: hg are OK
processing pl
all ID's for commodity: pl are OK
processing sugar
all ID's for commodity: sugar are OK
processing cocoa
all ID's for commodity: cocoa are OK


___
### Plot each df in dict_df using plotly
___

In [18]:
ddd = dict_df['cl']
net_cols = [c for c in ddd.columns.values if '_net' in c]
f = ju.plotly_plot(ddd[['trade_date','nav']+net_cols],x_column='trade_date',bar_plot=False,yaxis2_cols=['nav'])
iplot(f)


plotly.graph_objs.Margin is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.Margin




In [19]:
f = ju.plotly_plot(ddd[['trade_date','nav','monman_net','other_net']],x_column='trade_date',bar_plot=False,yaxis2_cols=['nav'])
iplot(f)

## COT vs Market Graphs
Run the cell below to create a graph for each commodity which displays both the COT history for that commodity and the price history for that commodity.

In [20]:
dict_df['gold'].tail(15)
dict_df.keys()

dict_keys(['gold', 'silver', 'cotton', 'soyb', 'soyo', 'wheat', 'corn', 'cl', 'ng', 'hg', 'pl', 'sugar', 'cocoa'])

In [21]:
y_left_label = 'y nav'
y_right_label = 'monman_net'

df_all_in = None
for k in dict_df.keys():
    df_in = dict_df[k]
    if len(df_in)>1:
        df_in_with_commod = df_in.copy()
        df_in_with_commod['symbol'] = k
        if df_all_in is None:
            df_all_in = df_in_with_commod.copy()
        else:
            df_all_in = df_all_in.append(df_in_with_commod,ignore_index=True)
    last_date = df_in.trade_date.max()
    xc = 'trade_date'
    plot_title = f'{k} last trade date {last_date}'
    try:
        fig = ju.plotly_pandas(df_in[['trade_date', 'nav', y_right_label]],x_column=xc,plot_title=plot_title,y_left_label=y_left_label,y_right_label=y_right_label)
        iplot(fig)
    except Exception as e:
        print(f'{k} {str(e)}')
df_all_in.to_csv(f'{TEMP_FOLDER}/df_all_in.csv',index=False)


In [22]:
# this is the old "non plotly" plotting routine
# ju.multi_df_plot(dict_df=dict_df,x_column='trade_date',num_of_x_ticks=40,save_file_prefix='cot_nav_plot',save_image_folder='./temp_folder/saved_images')    


___
### Try various strategies based on above charts
___

In [23]:
dict_df['cl'].columns.values

array(['trade_date', 'nav', 'prod_net', 'monman_net', 'swap_net',
       'other_net', 'nonrep_net', 'prod_ratio', 'monman_ratio',
       'swap_ratio', 'other_ratio'], dtype=object)

In [24]:
field_to_chart = 'nav'
field_to_chart2 = 'monman_net'
for sym in dict_df.keys():
    df_all_in = dict_df[sym].copy()    
    df_all_in[f'{field_to_chart2}_prev'] = df_all_in[field_to_chart2].shift(1)
    df_all_in['is_transition'] = df_all_in.apply(lambda r: 1 if r[field_to_chart2] / r[f'{field_to_chart2}_prev'] < 0 else 0,axis=1)
    df_all_in['trans_sign'] = df_all_in.apply(lambda r: 1 if r[field_to_chart2] - r[f'{field_to_chart2}_prev'] > 0 else -1,axis=1)
    df_all_in['transition'] = df_all_in.is_transition * df_all_in.trans_sign
    df_all_in2 = df_all_in[['trade_date',field_to_chart,'transition']]
    fig = ju.plotly_pandas(df_all_in2[['trade_date',field_to_chart,'transition']],x_column='trade_date',plot_title=sym)
    iplot(fig)
# len(df_all_in[df_all_in.tran_count==1])/len(df_all_in)

___
### The cells below help you find commodities in df_cot2 and df_oi
___

In [25]:
df_oi[df_oi.Product_Description=='SUGAR 11 FUTURES'].tail()

Unnamed: 0,CME__Globex__Volume,Commodity_Indicator,Description,ExPit_Volume,Exchange_Name,Future_Option_Indicator,MTD_ADV,None,OTC_Volume,Open_Interest,Pit_Volume,Product_Description,Total_Volume,trade_date
1166509,0.0,YO,AG PRODUCTS,0,NYMEX(STATS),F,0.0,,0,0,0,SUGAR 11 FUTURES,0,20180702
1385247,7.0,YO,AG PRODUCTS,0,NYMEX(STATS),F,3.5,,0,3,0,SUGAR 11 FUTURES,7,20190604
1386177,2.0,YO,AG PRODUCTS,0,NYMEX(STATS),F,3.0,,0,1,0,SUGAR 11 FUTURES,2,20190605
1387114,1.0,YO,AG PRODUCTS,0,NYMEX(STATS),F,2.5,,0,0,0,SUGAR 11 FUTURES,1,20190606
1388082,0.0,YO,AG PRODUCTS,0,NYMEX(STATS),F,2.0,,0,0,0,SUGAR 11 FUTURES,0,20190607


In [26]:
list(filter(lambda s: str(s)!='nan' and 'COCOA' in s and 'FUTURE' in s,list(set(df_oi.Product_Description))))

['COCOA FUTURES']

In [27]:
list(filter(lambda s: str(s)!='nan' and 'COCOA' in s ,list(set(df_cot2.Market_and_Exchange_Names))))

['COCOA - ICE FUTURES U.S.']

In [28]:
print(df_cot2[df_cot2.Market_and_Exchange_Names=='COTTON NO. 2 - NEW YORK COTTON EXCHANGE'].Open_Interest_All.sum())
print(df_cot2[df_cot2.Market_and_Exchange_Names=='COTTON NO. 2 - ICE FUTURES U.S.'].Open_Interest_All.sum())


0
63913208


In [29]:
list(filter(lambda s: 'Comm' in s,df_cot2.columns.values))

[]

In [30]:
df_cot2_cl = df_cot2[df_cot2.Market_and_Exchange_Names=='CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE']
df_cot2_cl = df_cot2_cl.sort_values('As_of_Date_In_Form_YYMMDD')
df_cot2_cl.tail()

Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,Prod_Merc_Positions_Long_All,Swap_Positions_Long_All,M_Money_Positions_Long_All,Other_Rept_Positions_Long_All,NonRept_Positions_Long_All,Tot_Rept_Positions_Long_All,Prod_Merc_Positions_Short_All,...,swap_net,swap_ratio,monman_net,monman_ratio,other_net,other_ratio,nonrep_net,nonrep_ratio,totrep_net,totrep_ratio
9410,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE",2020-04-14,2353955,488323,138641,295870,404604,129134,2224821,519210,...,-523942.0,0.209243,195355.0,2.943541,315314.0,4.531347,44160.0,1.519688,-44160.0,0.980538
9411,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE",2020-04-21,2276638,468562,146426,324667,411581,100919,2175719,517090,...,-544054.0,0.212064,247478.0,4.20613,339702.0,5.726026,5402.0,1.056555,-5402.0,0.997523
9412,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE",2020-04-28,2261202,463745,133110,384326,353452,102192,2159010,539452,...,-514578.0,0.205516,318366.0,5.826653,271022.0,4.287905,897.0,1.008855,-897.0,0.999585
9413,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE",2020-05-05,2243871,464901,136434,392191,330721,95769,2148102,557148,...,-444651.0,0.234792,328601.0,6.167495,202011.0,2.569505,6286.0,1.070248,-6286.0,0.997082
9414,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE",2020-05-12,2248020,491543,134875,405071,304486,102997,2145023,600425,...,-449089.0,0.230965,351685.0,7.587589,189334.0,2.644209,16952.0,1.197013,-16952.0,0.992159


## END

In [31]:
comod = 'hg'
fig = ju.plotly_pandas(dict_df[comod][['trade_date', 'monman_net', 'monman_ratio']],x_column='trade_date',plot_title=comod)
iplot(fig)


In [32]:
def get_cot2(id):
    basic_cols = ['Market_and_Exchange_Names','As_of_Date_In_Form_YYMMDD','Open_Interest_All']
    long_cols = ['M_Money_Positions_Long_All','Other_Rept_Positions_Long_All','Prod_Merc_Positions_Long_All',
                'NonRept_Positions_Long_All']
    short_cols = ['M_Money_Positions_Short_All','Other_Rept_Positions_Short_All','Prod_Merc_Positions_Short_All',
                'NonRept_Positions_Short_All']

    df_ret = df_cot2[df_cot2.Market_and_Exchange_Names==ID_DICT[id]['COT']]
    df_ret = df_ret[basic_cols + long_cols + short_cols]
    return df_ret
dfc = get_cot2('cl')

In [33]:
get_cot2('cl').columns.values

array(['Market_and_Exchange_Names', 'As_of_Date_In_Form_YYMMDD',
       'Open_Interest_All', 'M_Money_Positions_Long_All',
       'Other_Rept_Positions_Long_All', 'Prod_Merc_Positions_Long_All',
       'NonRept_Positions_Long_All', 'M_Money_Positions_Short_All',
       'Other_Rept_Positions_Short_All', 'Prod_Merc_Positions_Short_All',
       'NonRept_Positions_Short_All'], dtype=object)

In [34]:
dfc[dfc.As_of_Date_In_Form_YYMMDD=='2019-04-23']

Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,M_Money_Positions_Long_All,Other_Rept_Positions_Long_All,Prod_Merc_Positions_Long_All,NonRept_Positions_Long_All,M_Money_Positions_Short_All,Other_Rept_Positions_Short_All,Prod_Merc_Positions_Short_All,NonRept_Positions_Short_All
9360,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE",2019-04-23,2139213,352453,292217,448757,96400,22057,75254,507806,73290


In [35]:
dfc = df_cot2[df_cot2.Market_and_Exchange_Names==ID_DICT['cl']['COT']]

In [36]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(dfc[dfc.As_of_Date_In_Form_YYMMDD=='2019-04-23'].iloc[0])


Market_and_Exchange_Names         CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE
As_of_Date_In_Form_YYMMDD                                           2019-04-23 00:00:00
Open_Interest_All                                                               2139213
Prod_Merc_Positions_Long_All                                                     448757
Swap_Positions_Long_All                                                          158645
M_Money_Positions_Long_All                                                       352453
Other_Rept_Positions_Long_All                                                    292217
NonRept_Positions_Long_All                                                        96400
Tot_Rept_Positions_Long_All                                                     2042813
Prod_Merc_Positions_Short_All                                                    507806
Swap_Positions_Short_All                                                         670065
M_Money_Positions_Short_All     

In [37]:
from collections import OrderedDict
dfc2 = dfc.to_dict()#('rows')
dfc3 = OrderedDict(sorted(dfc2.items(), key=lambda x: dfc.columns.get_loc(x[0])))

In [38]:
dfc.columns.get_loc(list(dfc2.items())[2][0])

2

In [39]:
pd.DataFrame(dfc.to_dict('rows')).columns.values

array(['Market_and_Exchange_Names', 'As_of_Date_In_Form_YYMMDD',
       'Open_Interest_All', 'Prod_Merc_Positions_Long_All',
       'Swap_Positions_Long_All', 'M_Money_Positions_Long_All',
       'Other_Rept_Positions_Long_All', 'NonRept_Positions_Long_All',
       'Tot_Rept_Positions_Long_All', 'Prod_Merc_Positions_Short_All',
       'Swap_Positions_Short_All', 'M_Money_Positions_Short_All',
       'Other_Rept_Positions_Short_All', 'NonRept_Positions_Short_All',
       'Tot_Rept_Positions_Short_All', 'prod_net', 'prod_ratio',
       'swap_net', 'swap_ratio', 'monman_net', 'monman_ratio',
       'other_net', 'other_ratio', 'nonrep_net', 'nonrep_ratio',
       'totrep_net', 'totrep_ratio'], dtype=object)

In [40]:
dfc.columns.values#.get_loc(key='Open_Interest_All')

array(['Market_and_Exchange_Names', 'As_of_Date_In_Form_YYMMDD',
       'Open_Interest_All', 'Prod_Merc_Positions_Long_All',
       'Swap_Positions_Long_All', 'M_Money_Positions_Long_All',
       'Other_Rept_Positions_Long_All', 'NonRept_Positions_Long_All',
       'Tot_Rept_Positions_Long_All', 'Prod_Merc_Positions_Short_All',
       'Swap_Positions_Short_All', 'M_Money_Positions_Short_All',
       'Other_Rept_Positions_Short_All', 'NonRept_Positions_Short_All',
       'Tot_Rept_Positions_Short_All', 'prod_net', 'prod_ratio',
       'swap_net', 'swap_ratio', 'monman_net', 'monman_ratio',
       'other_net', 'other_ratio', 'nonrep_net', 'nonrep_ratio',
       'totrep_net', 'totrep_ratio'], dtype=object)

In [41]:
pd.DataFrame(dfc2).columns.values == dfc.columns.values

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True])

In [42]:
df_temp = pd.read_csv(f'{cme_csv_save_folder}/cme_open_interest_2019.csv')#[['trade_date','Open_Interest']]
df_temp = df_temp.sort_values('trade_date')
df_temp = df_temp[df_temp.Commodity_Indicator=='NG'][['trade_date','Open_Interest']]
df_temp.index = list(range(len(df_temp)))
ju.plotly_plot(df_temp,x_column='trade_date',bar_plot=False)


plotly.graph_objs.Margin is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.Margin




In [43]:
ID_DICT['silver']

{'OI': 'SILVER FUTURES',
 'COT': 'SILVER - COMMODITY EXCHANGE INC.',
 'ETF': 'SIZ99',
 'ETF_DIVISOR': 5000}

In [44]:
# importlib.reload(ju)

### Get open interest dataframe, and silver close data from barchartacs:


In [45]:
start_year = 2016

df_oi_silver = df_oi[df_oi.Product_Description==ID_DICT['silver']['OI']]
df_oi_silver[df_oi_silver.trade_date>20200301]
df_si = pga.get_sql(f"select settle_date trade_date,close from {futtab} where symbol='SIZ99'")
df_si.close = df_si.close/10
df_oi_silver = df_oi_silver.merge(df_si,on='trade_date',how='inner')

df_ois2 = df_oi_silver[df_oi_silver.trade_date>=start_year*100*100+1*100+1]
df_ois2.tail(20)

Unnamed: 0,CME__Globex__Volume,Commodity_Indicator,Description,ExPit_Volume,Exchange_Name,Future_Option_Indicator,MTD_ADV,None,OTC_Volume,Open_Interest,Pit_Volume,Product_Description,Total_Volume,trade_date,close
1645,58414.0,SI,METALS,0,COMEX(STATS),F,65752.9,,1592,143892,0,SILVER FUTURES,60006,20200424,15.222
1646,51818.0,SI,METALS,0,COMEX(STATS),F,65002.3,,424,140549,0,SILVER FUTURES,52242,20200427,15.1841
1647,62199.0,SI,METALS,0,COMEX(STATS),F,64883.7,,549,138521,0,SILVER FUTURES,62748,20200428,15.1402
1648,48598.0,SI,METALS,0,COMEX(STATS),F,64146.0,,1532,139073,0,SILVER FUTURES,50130,20200429,15.2924
1649,71668.0,SI,METALS,0,COMEX(STATS),F,64655.4,,3175,134866,0,SILVER FUTURES,74843,20200430,14.9456
1650,36502.0,SI,METALS,0,COMEX(STATS),F,36906.0,,404,132133,0,SILVER FUTURES,36906,20200501,14.9439
1651,40870.0,SI,METALS,0,COMEX(STATS),F,39320.5,,865,131906,0,SILVER FUTURES,41735,20200504,14.758
1652,35163.0,SI,METALS,0,COMEX(STATS),F,38019.7,,255,132629,0,SILVER FUTURES,35418,20200505,14.9338
1653,40658.0,SI,METALS,0,COMEX(STATS),F,38785.5,,425,132845,0,SILVER FUTURES,41083,20200506,14.8321
1654,52884.0,SI,METALS,0,COMEX(STATS),F,41874.6,,1347,137082,0,SILVER FUTURES,54231,20200507,15.3205


### Plot Silver Open Interest vs Silver Cash Close

In [46]:
f = ju.plotly_plot(
    df_in=df_ois2[['trade_date','Open_Interest','close']],
    x_column='trade_date',yaxis2_cols=['close'],
    y_left_label='Open Interest',
    y_right_label='Silver Cash Close',
    plot_title = f"Silver Open Interest vs Cash close, from {start_year} to present"
)
f = ju.plotly_shaded_rectangles([(20161125,20170308),(20180313,20180413),(20200218,20200406)],f)
iplot(f)


plotly.graph_objs.Margin is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.Margin




### Use Futures Cash prices to compare the price changes in Gold and Silver to the Gold/Silver Ratio

In [47]:
def _gtcommod(c,divisor=10):
    df = get_barchart_commod(c)
    df = df[['trade_date','nav']]
    df = df.rename(columns={'nav':c})
    df[c] = df[c]/divisor
    return df
df_si = _gtcommod('SI')
df_gc = _gtcommod('GC')
df_both = df_gc.copy()
df_both = df_both.merge(df_si,on='trade_date',how='inner')
df_both['ratio'] = df_both.GC / df_both.SI
f1 = ju.plotly_plot(df_in=df_both[['trade_date','ratio']],x_column='trade_date',
                    figsize=(800,500))
f2 = ju.plotly_plot(df_in=df_both[['trade_date','GC','SI']],x_column='trade_date',
                    yaxis2_cols=['SI'],y_left_label='GC Cash Price',y_right_label='SI Cash Price',
                    figsize=(800,700))

f1.data[0].yaxis = 'y3'
f4_traces = [f1.data[0],f2.data[0],f2.data[1]]
f2.update_layout(yaxis={'domain':(.57,1)})
f2.update_layout(yaxis2={'domain':(.55,1),'overlaying':'y','side':'right'})
f2.update_layout(yaxis3 = {'domain':(0,.43),'title':'GC/SI Ratio'})
f4 = go.Figure(data=f4_traces,layout=f2.layout)
f4.update_layout(
    title={
            'text': f"Gold and Silver Futures Prices vs Gold/Silver Ratio ",
            'y':0.9,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'}
)

### Use ETF prices to compare the price changes in Gold and Silver to the Gold/Silver Ratio

In [48]:
def _gtetf(c,divisor=1):
    dt_end = datetime.datetime.now()
    dt_beg = dt_end - datetime.timedelta(365*10)
    df = fetch_history(c,dt_beg,dt_end)
    df = df[['trade_date','nav']]
    df = df.rename(columns={'nav':c})
    df[c] = df[c]/divisor
    return df
df_si = _gtetf('SLV')
df_gc = _gtetf('GLD')
df_ief = _gtetf('IEF')
df_both = df_gc.copy()
df_both = df_both.merge(df_si,on='trade_date',how='inner')
df_both['ratio'] = df_both.GLD / df_both.SLV

f1 = ju.plotly_plot(df_in=df_both[['trade_date','ratio']],x_column='trade_date',
                    figsize=(800,500))
f2 = ju.plotly_plot(df_in=df_both[['trade_date','GLD','SLV']],x_column='trade_date',
                    yaxis2_cols=['SLV'],y_left_label='GLD price',y_right_label='SLV price',
                    figsize=(800,700))
# f3 = ju.plotly_plot(df_in=df_ief[['trade_date','IEF']],x_column='trade_date',
#                     figsize=(800,500))

f1.data[0].yaxis = 'y3'
f4_traces = [f1.data[0],f2.data[0],f2.data[1]]
f2.update_layout(yaxis={'domain':(.57,1)})
f2.update_layout(yaxis2={'domain':(.55,1),'overlaying':'y','side':'right'})
f2.update_layout(yaxis3 = {'domain':(0,.43),'title':'GLD/SLV Ratio'})
f4 = go.Figure(data=f4_traces,layout=f2.layout)
f4.update_layout(
    title={
            'text': f"Gold and Silver ETF Prices vs Gold/Silver Ratio ",
            'y':0.9,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'}
)

In [49]:
def _gtcommod2(s):
    df = get_barchart_commod(s[:2],year=s[-2:],month=s[2])
    df = df[['trade_date','nav']]
    df = df.rename(columns={'nav':s})
    return df.iloc[-100:]
df_cln20 = _gtcommod2('CLN20')
df_clm21 = _gtcommod2('CLM21')
df_both = df_cln20.merge(df_clm21,on='trade_date',how='inner')
df_both['spr'] = df_both.apply(lambda r:r.CLM21 - r.CLN20,axis=1)
df_uso = _gtetf('USO')[-100:]
df_both = df_both.merge(df_uso,on='trade_date',how='inner')
f1 = ju.plotly_plot(df_in=df_both[['trade_date','spr','USO']],x_column='trade_date',
                    yaxis2_cols=['USO'],figsize=(800,500))
f2 = ju.plotly_plot(df_in=df_both[['trade_date','CLN20','CLM21']],x_column='trade_date',
                    y_left_label='Futures Price',
#                     yaxis2_cols=['CLM21'],y_left_label='CLN20 price',y_right_label='CLM21 price',
                    figsize=(800,600))

f1.data[0].yaxis = 'y3'
f1.data[1].yaxis = 'y4'
f4_traces = [f1.data[0],f1.data[1],f2.data[0],f2.data[1]]
f2.update_layout(yaxis={'domain':(.57,1)})
f2.update_layout(yaxis2={'domain':(.57,1),'overlaying':'y','side':'right'})
f2.update_layout(yaxis3 = {'domain':(0,.43),'title':'CLN20/CLM21 Spread'})
f2.update_layout(yaxis4 = {'domain':(0,.43),'title':'USO price','overlaying':'y','side':'right'})
f4 = go.Figure(data=f4_traces,layout=f2.layout)
f4.update_layout(
    title={
            'text': f"""CLN20 and CLM21 Futures Prices <br>vs<br>CLN20/CLM21 Spread""",
            'y':.93,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font':{'size':14}
    },
    legend = {'x':1,'y':1.2},
    modebar={'orientation': 'v','bgcolor':'grey'}
)

In [50]:
df_both

Unnamed: 0,trade_date,CLN20,CLM21,spr,USO
0,20191231,58.87,54.44,-4.43,102.480003
1,20200102,59.25,54.88,-4.37,102.480003
2,20200103,60.83,55.46,-5.37,105.440002
3,20200106,61.06,55.63,-5.43,105.279999
4,20200107,60.76,55.69,-5.07,105.040001
...,...,...,...,...,...
94,20200515,29.52,34.01,4.49,22.389999
95,20200518,31.65,35.93,4.28,24.309999
96,20200519,31.96,35.94,3.98,24.440001
97,20200520,33.49,36.65,3.16,25.340000


In [159]:
def plotly_subplots(df,df_yaxis_placement,num_ticks_to_display=20,title=""):
    rows = int(df_yaxis_placement['row'].max())
    cols = int(df_yaxis_placement['col'].max())
    specs = [[{"secondary_y": True} for i in range(cols)] for _ in range(rows)]
    fig = make_subplots(rows=rows, cols=cols,
                    specs=specs,shared_xaxes=True)

    df_yp = df_yaxis_placement.copy()
    df_yp['row'].fillna(1)
    df_yp['col'].fillna(1)
    df_yp.is_secondary.fillna(False)
    df_yp.yaxis_title.fillna('')
    
    # add traces
    for i in range(len(df_yp)):
        r = df_yp.iloc[i]
        td = df[r.x_column].values
        nt = len(df)-1 if num_ticks_to_display > len(df) else num_ticks_to_display
        spacing = len(td)//nt
        tdvals = td[::spacing]
        tdtext = tdvals
#         if x_value_labels is not None:
#             tdtext = [x_value_labels[i] for i in tdvals]
        x = td
        y = df[r['name']].values
        row = int(r['row'])
        col = int(r['col'])
        is_secondary = r.is_secondary
        yaxis_title = r.yaxis_title
        name=r['name']
        fig.add_trace(
            go.Scatter(x=x, y=y, name=name),
            row=row, col=col, secondary_y=is_secondary)
        fig.update_xaxes(
            ticktext=tdtext,
            tickvals=tdvals,
            tickangle=45,
            type='category', row=row, col=col)
        fig.update_yaxes(title_text=yaxis_title, row=row, col=col)  
    
    fig.update_layout(
        title={
            'text': title,
            'y':0.9,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
    )

    figdictstring = fig.to_json()
    figdict = json.loads(figdictstring)
    for i in range(len(figdict['data'])):
        figdict['layout'][f"yaxis{str(i) if i>1 else ''}"]['title'] = df_yp.iloc[0].yaxis_title
    return figdict



In [161]:
names = df_both.columns.values[1:]
x_columns = ['trade_date' for _ in range(len(names))]
yp_rows = [1,1,2,2]
yp_cols = [1,1,1,1]
yp_secondary = [False,False,False,True]
yp_yaxis_titles = ['Futures Price','Futures Price','Spread Price','USO Price']
df_yp = pd.DataFrame({'name':names,'x_column':x_columns,
                      'row':yp_rows,'col':yp_cols,'is_secondary':yp_secondary,
                     'yaxis_title':yp_yaxis_titles})

fig =  plotly_subplots(df_both,df_yp,title="Correlation Between Crude Spreads and USO")
iplot(fig)

In [162]:
td = 'trade_date'
fp = 'Futures Price'
cln20 = ['CLN20',td,1,1,False,fp]#,'x','y','y']
clm21 = ['CLM21',td,1,1,False,fp]#,'x1']
spr = ['spr',td,2,1,False,'Spread Price']
uso = ['USO',td,2,1,True,'USO Price']
df_yp = pd.DataFrame([cln20,clm21,spr,uso],
                    columns=['name','x_column','row','col','is_secondary','yaxis_title'])#,'xaxis','yaxis_left','yaxis_right'])
fig2 =  plotly_subplots(df_both,df_yp,title="Correlation Between Crude Spreads and USO",
                      num_ticks_to_display=15)
# fig2['layout']['xaxis']['anchor'] = 'y3'
iplot(fig2)

In [164]:
td = 'trade_date'
fp = 'Futures Price'
cln20 = ['CLN20',td,1,1,False,"CLN20 Price",'x','y','y']
clm21 = ['CLM21',td,1,2,False,'CLM21 Price']
spr = ['spr',td,2,1,False,'Spread Price']
uso = ['USO',td,2,2,False,'USO Price']
df_yp = pd.DataFrame([cln20,clm21,spr,uso],
                    columns=['name','x_column','row','col','is_secondary','yaxis_title','xaxis','yaxis_left','yaxis_right'])
fig2 =  plotly_subplots(df_both,df_yp,title="Correlation Between Crude Spreads and USO",
                      num_ticks_to_display=10)
# fig2['layout']['xaxis']['anchor'] = 'y3'
iplot(fig2)

In [114]:
def print_axis_info(ff):
    display(ff['layout'].keys())
    xsd = lambda k,j:None if j not in ff['layout'][k] else ff['layout'][k][j]
    xs = [(k,[xsd(k,j) for j in ['anchor','domain','type','title']]) for k in ff['layout'].keys() if 'xaxis' in k]
    display(xs)
    ysd = lambda k,j:None if j not in ff['layout'][k] else ff['layout'][k][j]
    # ys = [(k,fig2['layout'][k]) for k in fig2['layout'].keys() if 'yaxis' in k]
    ys = [(k,[ysd(k,j) for j in ['anchor','domain','overlaying','title']]) for k in ff['layout'].keys() if 'yaxis' in k]
    display(ys)

In [115]:
display(print_axis_info(fig))
display(print_axis_info(fig2))

dict_keys(['template', 'title', 'xaxis', 'xaxis2', 'yaxis', 'yaxis2', 'yaxis3', 'yaxis4'])

[('xaxis', ['y', [0.0, 0.94], 'category', None]),
 ('xaxis2', ['y3', [0.0, 0.94], 'category', None])]

[('yaxis', ['x', [0.575, 1.0], None, {'text': 'Futures Price'}]),
 ('yaxis2', ['x', None, 'y', {'text': 'Futures Price'}]),
 ('yaxis3', ['x2', [0.0, 0.425], None, {'text': 'USO Price'}]),
 ('yaxis4', ['x2', None, 'y3', {'text': 'USO Price'}])]

None

dict_keys(['template', 'title', 'xaxis', 'xaxis2', 'yaxis', 'yaxis2', 'yaxis3', 'yaxis4'])

[('xaxis', ['y', [0.0, 0.94], 'category', None]),
 ('xaxis2', ['y3', [0.0, 0.94], 'category', None])]

[('yaxis', ['x', [0.575, 1.0], None, {'text': 'Futures Price'}]),
 ('yaxis2', ['x', None, 'y', {'text': 'Futures Price'}]),
 ('yaxis3', ['x2', [0.0, 0.425], None, {'text': 'USO Price'}]),
 ('yaxis4', ['x2', None, 'y3', {'text': 'USO Price'}])]

None

In [116]:
[(i,[fig2['data'][i][j] for j in ['name','type','xaxis','yaxis']]) for i in range(len(fig2['data']))]

[(0, ['CLN20', 'scatter', 'x', 'y']),
 (1, ['CLM21', 'scatter', 'x', 'y']),
 (2, ['spr', 'scatter', 'x2', 'y3']),
 (3, ['USO', 'scatter', 'x2', 'y4'])]

for data, you have each graph object, which has the properties (at least for Scatter)
  'name','type','x','y','xaxis','yaxis' properties
for layout you have
  'template', 'title', 'xaxis', 'xaxis2', 'yaxis', 'yaxis2', 'yaxis3', 'yaxis4' properties
for each yaxis, you have 
  'anchor','domain','type','title' properties
for each yaxis, you have 
  'anchor','domain','overlaying','title' properties

In [None]:
{
    'data':[
        {
            'name':_name,
            'type':_type,
            'x':x,
            'y':y,
            'xaxis':xaxis,
            'yaxis':yaxis
        },...
    ],
    'layout':
}

In [135]:
display(fig2['layout']['template']['data'].keys())
display(fig2['layout']['template']['layout'].keys())

dict_keys(['bar', 'barpolar', 'carpet', 'choropleth', 'contour', 'contourcarpet', 'heatmap', 'heatmapgl', 'histogram', 'histogram2d', 'histogram2dcontour', 'mesh3d', 'parcoords', 'pie', 'scatter', 'scatter3d', 'scattercarpet', 'scattergeo', 'scattergl', 'scattermapbox', 'scatterpolar', 'scatterpolargl', 'scatterternary', 'surface', 'table'])

dict_keys(['annotationdefaults', 'coloraxis', 'colorscale', 'colorway', 'font', 'geo', 'hoverlabel', 'hovermode', 'mapbox', 'paper_bgcolor', 'plot_bgcolor', 'polar', 'scene', 'shapedefaults', 'ternary', 'title', 'xaxis', 'yaxis'])

In [155]:
[{'title':{'text':f"yaxis{str(i) if i>1 else ''}"}} for i in range(4)]
# fig['layout']['yaxis']['title']
[fig2['layout'][f"yaxis{str(i) if i>1 else ''}"]['title'] for i in range(4)]

[{'text': 'Futures Price'},
 {'text': 'Futures Price'},
 {'text': 'Futures Price'},
 {'text': 'USO Price'}]

### END