___
# Merge csv files for ETF shares, CME Open Interest, and CFTC COT reports.
___

In [1]:
import pandas as pd
from pandas_datareader import data as pdr
import numpy as np
import os, sys
import datetime
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly.offline import iplot
from plotly.offline import  init_notebook_mode, iplot
init_notebook_mode(connected=True)

import zipfile
import urllib.request
from PIL import Image
import jupyter_utilities as ju
import importlib
import pandasql as psql

# Make important folders
TEMP_FOLDER = './temp_folder'
try:
    os.mkdir(TEMP_FOLDER)
except:
    pass
SAVE_IMAGE_FOLDER = f'{TEMP_FOLDER}/gold'
try:
    os.mkdir(SAVE_IMAGE_FOLDER)
except:
    pass

def to_int(s):
    try:
        return int(float(str(s)))
    except:
        print(f'to_int exception on value:{s}')
        return None

pd.set_option('display.max_colwidth',1000)
if os.path.abspath('../')  not in sys.path:
    if '.' not in sys.path:
        sys.path.append(os.path.abspath('../'))
import barchart_api as bcapi

    
cme_csv_save_folder = './cme_oi_data'
cot_data_path = './cot_history.csv'
etf_data_path = './etf_cap_hist.csv'

In [2]:
importlib.reload(ju)

<module 'jupyter_utilities' from '/Users/bperlman1/Documents/billybyte/pyliverisk/jupyter_notebooks/cme_open_interest/jupyter_utilities.py'>

### Define commodity and etf identifiers in the csv files

In [3]:
OI_ID_GOLD = 'GOLD FUTURES'
OI_ID_SILVER = 'SILVER FUTURES'
OI_ID_CL = 'CRUDE OIL LIGHT SWEET FUTURES'
OI_ID_10Y = '10Y NOTE FUTURE'
OI_ID_30Y = '30Y BOND FUTURE'
OI_ID_SPY = 'E-MINI S&P 500 FUTURE'
OI_ID_SOYB = 'SOYBEAN FUTURE'
OI_ID_CORN = 'CORN FUTURE'
OI_ID_COTTON = 'COTTON FUTURES'
OI_ID_WHEAT = 'CHICAGO SRW WHEAT FUTURE'
OI_ID_COTTON = 'COTTON FUTURES'
OI_ID_EURO = 'EURO FX FUTURE'
OI_ID_BP = 'BRITISH POUND FUTURE'
OI_ID_CD = 'CANADIAN DOLLAR FUTURE'
OI_ID_ED = 'EURODOLLAR FUTURE'
OI_ID_HG = 'HIGH GRADE COPPER FUTURES'

COT_ID_GOLD= 'GOLD - COMMODITY EXCHANGE INC.'
COT_ID_SILVER= 'SILVER - COMMODITY EXCHANGE INC.'
# COT_ID_CL = 'CRUDE OIL, LIGHT SWEET'
COT_ID_CL = 'CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE'
COT_ID_10Y = '10-YEAR U.S. TREASURY NOTES - CHICAGO BOARD OF TRADE'
COT_ID_30Y = 'U.S. TREASURY BONDS - CHICAGO BOARD OF TRADE'
COT_ID_SPY = 'E-MINI S&P 500 STOCK INDEX - CHICAGO MERCANTILE EXCHANGE'
COT_ID_SOYB = 'SOYBEANS - CHICAGO BOARD OF TRADE'
# COT_ID_WHEAT = 'WHEAT - CHICAGO BOARD OF TRADE'
COT_ID_COTTON = 'COTTON NO. 2 - ICE FUTURES U.S.'
COT_ID_WHEAT = 'WHEAT-SRW - CHICAGO BOARD OF TRADE'
COT_ID_CORN = 'CORN - CHICAGO BOARD OF TRADE'
COT_ID_EURO = 'EURO FX - CHICAGO MERCANTILE EXCHANGE'
COT_ID_BP = 'BRITISH POUND STERLING - CHICAGO MERCANTILE EXCHANGE'
COT_ID_CD = 'CANADIAN DOLLAR - CHICAGO MERCANTILE EXCHANGE'
COT_ID_ED = '3-MONTH EURODOLLARS - CHICAGO MERCANTILE EXCHANGE'
COT_ID_HG = 'COPPER-GRADE #1 - COMMODITY EXCHANGE INC.'

ETF_ID_GOLD = 'GLD'
ETF_ID_SILVER = 'SLV'
ETF_ID_CL = 'USO'
ETF_ID_10Y = 'AGG'
ETF_ID_30Y = 'TLT'
ETF_ID_SPY = 'SPY'
ETF_ID_COTTON = 'BAL'
ETF_ID_SOYB = 'SOYB'
ETF_ID_WHEAT = 'WEAT'
ETF_ID_CORN = 'CORN'
ETF_ID_EURO = 'FXE'
ETF_ID_BP = 'FXB'
ETF_ID_CD = 'FXC'
ETF_ID_ED = 'BSV'
ETF_ID_HG = 'CPER'

ETF_SHARES_DIVISOR_GOLD = 1000
ETF_SHARES_DIVISOR_SILVER = 5000
ETF_SHARES_DIVISOR_10Y = 1000
ETF_SHARES_DIVISOR_30Y = 1000
ETF_SHARES_DIVISOR_SPY = 500
ETF_SHARES_DIVISOR_COTTON = 50000
ETF_SHARES_DIVISOR_SOYB = 5000
ETF_SHARES_DIVISOR_WHEAT = 5000
ETF_SHARES_DIVISOR_CORN = 5000
ETF_SHARES_DIVISOR_EURO = 1000
ETF_SHARES_DIVISOR_BP = 1000
ETF_SHARES_DIVISOR_CD = 1000
ETF_SHARES_DIVISOR_CL = 4000
ETF_SHARES_DIVISOR_ED = 1000000
ETF_SHARES_DIVISOR_HG = 2500


ID_DICT = {
    'gold':{'OI':OI_ID_GOLD,'COT':COT_ID_GOLD,'ETF':ETF_ID_GOLD,'ETF_DIVISOR':ETF_SHARES_DIVISOR_GOLD},
    'silver':{'OI':OI_ID_SILVER,'COT':COT_ID_SILVER,'ETF':ETF_ID_SILVER,'ETF_DIVISOR':ETF_SHARES_DIVISOR_SILVER},
    '10Y':{'OI':OI_ID_10Y,'COT':COT_ID_10Y,'ETF':ETF_ID_10Y,'ETF_DIVISOR':ETF_SHARES_DIVISOR_10Y},
    '30Y':{'OI':OI_ID_30Y,'COT':COT_ID_30Y,'ETF':ETF_ID_30Y,'ETF_DIVISOR':ETF_SHARES_DIVISOR_30Y},
    'spy':{'OI':OI_ID_SPY,'COT':COT_ID_SPY,'ETF':ETF_ID_SPY,'ETF_DIVISOR':ETF_SHARES_DIVISOR_SPY},
    'cotton':{'OI':OI_ID_COTTON,'COT':COT_ID_COTTON,'ETF':ETF_ID_COTTON,'ETF_DIVISOR':ETF_SHARES_DIVISOR_COTTON},
    'soyb':{'OI':OI_ID_SOYB,'COT':COT_ID_SOYB,'ETF':ETF_ID_SOYB,'ETF_DIVISOR':ETF_SHARES_DIVISOR_SOYB},
    'wheat':{'OI':OI_ID_WHEAT,'COT':COT_ID_WHEAT,'ETF':ETF_ID_WHEAT,'ETF_DIVISOR':ETF_SHARES_DIVISOR_WHEAT},
    'corn':{'OI':OI_ID_CORN,'COT':COT_ID_CORN,'ETF':ETF_ID_CORN,'ETF_DIVISOR':ETF_SHARES_DIVISOR_CORN},
    'euro':{'OI':OI_ID_EURO,'COT':COT_ID_EURO,'ETF':ETF_ID_EURO,'ETF_DIVISOR':ETF_SHARES_DIVISOR_EURO},
    'bp':{'OI':OI_ID_BP,'COT':COT_ID_BP,'ETF':ETF_ID_BP,'ETF_DIVISOR':ETF_SHARES_DIVISOR_BP},
    'cd':{'OI':OI_ID_CD,'COT':COT_ID_CD,'ETF':ETF_ID_CD,'ETF_DIVISOR':ETF_SHARES_DIVISOR_CD},
    'cl':{'OI':OI_ID_CL,'COT':COT_ID_CL,'ETF':ETF_ID_CL,'ETF_DIVISOR':ETF_SHARES_DIVISOR_CL},
    'ed':{'OI':OI_ID_ED,'COT':COT_ID_ED,'ETF':ETF_ID_ED,'ETF_DIVISOR':ETF_SHARES_DIVISOR_ED},
    'hg':{'OI':OI_ID_HG,'COT':COT_ID_HG,'ETF':ETF_ID_HG,'ETF_DIVISOR':ETF_SHARES_DIVISOR_HG},
}


___
## Define help access routines
___

In [4]:
# COT helpers
def df_cot_by_name(dict_id='cl',df_cot=None):
    dfc = df_cot2 if df_cot is None else df_cot
    cot_id = ID_DICT[dict_id]['COT']
    return dfc[dfc.Market_and_Exchange_Names==cot_id]


___
### Get cme open interest, COT and ETF data from csv files
___

In [5]:
# df_soyb = fetch_history(ETF_ID_SOYB,df_etf.date.min(),df_etf.date.max())
# print(df_etf.tail())
# print(df_soyb.tail())
# df_etf = df_etf.append(df_soyb,ignore_index=True)

In [6]:
df_oi = None
years = np.linspace(2013,2019,7,dtype=int)
df_oi = None
for y in years:
    df_temp = pd.read_csv(f'{cme_csv_save_folder}/cme_open_interest_{y}.csv')
    df_temp = df_temp[~df_temp.Open_Interest.isnull()]
    if df_oi is None:
        df_oi = df_temp.copy()
    else:
        df_oi = df_oi.append(df_temp,ignore_index=True)
        df_oi.index = list(range(len(df_oi)))
df_oi.Open_Interest = df_oi.Open_Interest.apply(to_int)
df_oi.Total_Volume = df_oi.Total_Volume.apply(to_int)
df_oi = df_oi[~df_oi.Total_Volume.isnull()]
print(f'oi length:{len(df_oi)}')
df_etf = pd.read_csv(etf_data_path)
df_etf['trade_date'] = df_etf.date.apply(ju.str_to_yyyymmdd)
print(f'etf length:{len(df_etf)}')
df_cot2 = pd.read_csv(cot_data_path)
df_cot2.As_of_Date_in_Form_YYYY_MM_DD = df_cot2.As_of_Date_in_Form_YYYY_MM_DD.apply(ju.str_to_date)
df_cot2.Market_and_Exchange_Names = df_cot2.Market_and_Exchange_Names.str.strip() 
print(f'cot length:{len(df_cot2)}')



Columns (3,6,7,8,9,10,12) have mixed types. Specify dtype option on import or set low_memory=False.


Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.



to_int exception on value:**Total
Volume
to_int exception on value:ExPit
Volume
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on value:nan
to_int exception on 

### For some of the ETF's, get the data from yahoo, and ignore the shares data

In [7]:
def fetch_history(symbol,dt_beg,dt_end):
    df = pdr.DataReader(symbol, 'yahoo', dt_beg, dt_end)
    df['date'] = df.index
    df.date = df.date.apply(lambda d: str(d)[0:4] + "-" + str(d)[5:7] + "-" + str(d)[8:10])
    df['trade_date'] = df.date.apply(lambda d: int(str(d)[0:4] + str(d)[5:7] + str(d)[8:10]))
    df = df.sort_values('date')
    df.index = list(range(len(df)))
    # make adj close the close
    df['nav'] = df['Adj Close']
    df['symbol'] = symbol
    df['shares'] = 0
    df = df[['symbol','date','nav','shares','trade_date']]
    return df

    

### Find identifier strings for specific Open Interest and COT rows in their respective DataFrames
1. Enter values for oi_key_word, cot_key_word and etf_key_word below
2. Choose the product/market_and_exchange_name/symbol that has the highest open_interest or volume

In [8]:
import pdb
def create_merged_df(commod_to_use):
    global df_oi,df_etf,df_cot2
    # step 1 set up ID's
    OI_ID = ID_DICT[commod_to_use]['OI']
    COT_ID = ID_DICT[commod_to_use]['COT']
    ETF_ID = ID_DICT[commod_to_use]['ETF']
    ETF_DIVISOR = ID_DICT[commod_to_use]['ETF_DIVISOR']
    #Step 2: make sure ID's produce only one contract
    oi_key_word = OI_ID.lower()
    cot_key_word = COT_ID.lower()
    etf_key_word = ETF_ID.lower()

    l = (list(filter(lambda s: oi_key_word == str(s).lower(),df_oi.Product_Description.unique())))
    df_oi_sub = df_oi[df_oi.Product_Description.isin(l)][['Product_Description','Open_Interest']]
    df_oi_gb = df_oi_sub.groupby('Product_Description',as_index=False).sum()

    l = (list(filter(lambda s: cot_key_word == str(s).lower(),df_cot2.Market_and_Exchange_Names.unique())))
    df_cot_single = df_cot2[df_cot2.Market_and_Exchange_Names==COT_ID]
    df_cot_sub = df_cot2[df_cot2.Market_and_Exchange_Names.isin(l)][['Market_and_Exchange_Names','Open_Interest_All']]
    df_cot_gb = df_cot_sub.groupby('Market_and_Exchange_Names',as_index=False).sum()

    l = (list(filter(lambda s: etf_key_word == str(s).lower(),df_etf.symbol.unique())))
#     if len(l) > 0:
#         df_etf_sub = df_etf[df_etf.symbol.isin(l)][['symbol','shares']]
#     else:
#         # fetch from yahoo
#         df_etf_sub = fetch_history(ETF_ID,df_etf.date.min(),df_etf.date.max())
#         df_etf_sub.date
#         df_etf = df_etf.append(df_etf_sub,ignore_index=True)        
        
    # fetch from yahoo
#     df_etf_sub = fetch_history(ETF_ID,df_etf.date.min(),df_etf.date.max())
#     df_etf_gb = df_etf_sub.groupby('symbol',as_index=False).sum()
    dtmin = str(df_cot_single.As_of_Date_in_Form_YYYY_MM_DD.min())[0:10]
#     dtmax = str(df_cot_single.As_of_Date_in_Form_YYYY_MM_DD.max())[0:10]
    dtmax = str(datetime.datetime.now())[0:10]
    df_etf_single = fetch_history(ETF_ID,dtmin,dtmax)
    df_etf_gb = df_etf_single.groupby('symbol',as_index=False).sum()
    

    if len(df_oi_gb) + len(df_cot_gb) + len(df_etf_gb) == 3:
        print(f"all ID's for commodity: {commod_to_use} are OK")
    else:
        print(f"!!!!! ALL ID's FOR COMMODITY: {commod_to_use} ARE NOT OK!!!!!!")
        print(f'oi is OK: {len(df_oi_gb)==1}')
        print(f'cot is OK: {len(df_cot_gb)==1}')
        print(f'etf is OK: {len(df_etf_gb)==1}')
        raise ValueError(f'ambiguous ID name {OI_ID}')
    
    
    # Step 3: merge oi, cot and etf stuff
    df_oi_single = df_oi[df_oi.Product_Description == OI_ID][['trade_date','Open_Interest','Total_Volume']]
    df_oi_single.Open_Interest = df_oi_single.Open_Interest.apply(to_int)
    df_oi_single.Total_Volume = df_oi_single.Total_Volume.astype(float).astype(int)
#     df_etf_single = df_etf[df_etf.symbol==ETF_ID]

    basic_cols = ['Market_and_Exchange_Names','As_of_Date_in_Form_YYYY_MM_DD','Open_Interest_All']
    long_cols = ['Market_and_Exchange_Names','As_of_Date_in_Form_YYYY_MM_DD',
                'Noncommercial_Positions_Long_All','Commercial_Positions_Long_All',
                'Nonreportable_Positions_Long_All','Traders_Commercial_Long_All',
                 'Traders_Noncommercial_Long_All','Traders_Total_Reportable_Long_All']
    short_cols = ['Market_and_Exchange_Names','As_of_Date_in_Form_YYYY_MM_DD',
                'Noncommercial_Positions_Short_All','Commercial_Positions_Short_All',
                'Nonreportable_Positions_Short_All','Total_Reportable_Positions_Short_All',
                'Traders_Commercial_Short_All','Traders_Noncommercial_Short_All',
                'Traders_Total_Reportable_Short_All']
    df_commod_long = df_cot_single[long_cols]
    df_commod_short = df_cot_single[short_cols]

    def non_comm_ratio(r):
        if r.Noncommercial_Positions_Short_All==0:
            return 0.0
        return float(r.Noncommercial_Positions_Long_All)/float(r.Noncommercial_Positions_Short_All)
    def non_comm_net(r):
        return float(r.Noncommercial_Positions_Long_All) - float(r.Noncommercial_Positions_Short_All)
    def comm_net(r):
        return float(r.Commercial_Positions_Long_All) - float(r.Commercial_Positions_Short_All)
    def non_report_net(r):
        return float(r.Nonreportable_Positions_Long_All) - float(r.Nonreportable_Positions_Short_All)
    def traders_comm_net(r):
        return float(r.Traders_Commercial_Long_All) - float(r.Traders_Commercial_Short_All)
    def traders_noncomm_net(r):
        return float(r.Traders_Noncommercial_Long_All) - float(r.Traders_Noncommercial_Short_All)

    df_commod_net = df_commod_long.merge(df_commod_short,how='inner',on=['Market_and_Exchange_Names','As_of_Date_in_Form_YYYY_MM_DD'])
    df_commod_net['Noncommercial_Positions_Ratio_All'] = df_commod_net.apply(non_comm_ratio,axis=1)
    df_commod_net['Noncommercial_Positions_Net_All'] = df_commod_net.apply(non_comm_net,axis=1)
    df_commod_net['Commercial_Positions_Net_All'] = df_commod_net.apply(comm_net,axis=1)
    df_commod_net['Nonreportable_Positions_Net_All'] = df_commod_net.apply(non_report_net,axis=1)
    df_commod_net['Traders_Commercial_Net_All'] = df_commod_net.apply(traders_comm_net,axis=1)
    df_commod_net['Traders_Noncommercial_Net_All'] = df_commod_net.apply(traders_noncomm_net,axis=1)
    net_cols = ['Market_and_Exchange_Names','As_of_Date_in_Form_YYYY_MM_DD','Noncommercial_Positions_Net_All','Noncommercial_Positions_Ratio_All','Commercial_Positions_Net_All','Nonreportable_Positions_Net_All','Traders_Commercial_Net_All','Traders_Noncommercial_Net_All']
    df_commod_net = df_commod_net[net_cols]
    df_commod_net['cot_yyyymmdd'] = df_commod_net.As_of_Date_in_Form_YYYY_MM_DD.apply(ju.str_to_yyyymmdd)
    df_commod_net = df_commod_net.sort_values('cot_yyyymmdd')
    df_commod_net.index = list(range(len(df_commod_net)))

    last_date = ju.str_to_date(str(df_commod_net.iloc[-1].cot_yyyymmdd),sep='') + datetime.timedelta(7)
    last_date_yyyymmdd = ju.str_to_yyyymmdd(last_date)
    df_commod_net['next_cot_yyyymmdd'] = list(df_commod_net[1:].cot_yyyymmdd) + [last_date_yyyymmdd]
    cols_to_change = {'Noncommercial_Positions_Ratio_All':'noncommratio','Noncommercial_Positions_Net_All':'noncomm',
                      'Commercial_Positions_Net_All':'comm',
                      'Nonreportable_Positions_Net_All':'nonrep','Traders_Commercial_Net_All':'trade_com',
                     'Traders_Noncommercial_Net_All':'trade_noncomm'}
    df_commod_net = df_commod_net.rename(columns=cols_to_change)
    df_etf_oi = df_etf_single[['trade_date','nav','shares']].merge(df_oi_single,how='inner',on='trade_date')
    df_etf_oi['nav_diff'] = df_etf_oi.nav.pct_change()
    df_etf_oi['share_diff'] = df_etf_oi.shares.pct_change()
    df_etf_oi['oi_diff'] = df_etf_oi.Open_Interest.pct_change()
    q = f"select * from df_etf_oi inner join df_commod_net on df_etf_oi.trade_date >= df_commod_net.cot_yyyymmdd and df_etf_oi.trade_date < df_commod_net.next_cot_yyyymmdd"
    df_etf_oi_cot =  psql.sqldf(q, locals())
    
    etf_oi_cols = list(df_etf_oi.columns.values)
    cot_cols = list(cols_to_change.values()) + ['cot_yyyymmdd','next_cot_yyyymmdd']
    etf_oi_cot_cols = etf_oi_cols + cot_cols
    df_etf_oi_cot = df_etf_oi_cot[etf_oi_cot_cols]

    # step 4: create final
    df_final = df_etf_oi_cot[['trade_date','nav','noncomm','noncommratio','comm']][-1000:]
    df_final = df_final.loc[:,~df_final.columns.duplicated()]
    return df_final


### Create multi plot of all data commodities

In [9]:
dict_df = {}
last_n_days = 1000
for k in ID_DICT.keys():
    print(f'processing {k}')
    df = create_merged_df(k)
    dict_df[k] = df.iloc[-last_n_days:]


processing gold
all ID's for commodity: gold are OK
processing silver
all ID's for commodity: silver are OK
processing 10Y
all ID's for commodity: 10Y are OK
processing 30Y
all ID's for commodity: 30Y are OK
processing spy
all ID's for commodity: spy are OK
processing cotton
all ID's for commodity: cotton are OK
processing soyb
all ID's for commodity: soyb are OK
processing wheat
all ID's for commodity: wheat are OK
processing corn
all ID's for commodity: corn are OK
processing euro
all ID's for commodity: euro are OK
processing bp
all ID's for commodity: bp are OK
processing cd
all ID's for commodity: cd are OK
processing cl
all ID's for commodity: cl are OK
processing ed
all ID's for commodity: ed are OK
processing hg
all ID's for commodity: hg are OK


In [10]:
dict_df.keys()
# dict_df['cl'].columns.values

dict_keys(['gold', 'silver', '10Y', '30Y', 'spy', 'cotton', 'soyb', 'wheat', 'corn', 'euro', 'bp', 'cd', 'cl', 'ed', 'hg'])

___
### Plot each df in dict_df using plotly
___

In [11]:
# s= str(fig.to_plotly_json()).replace("'",'"')
# open('/users/bperlman1/mydata4.json','w').write(s)

In [12]:
df_all_in = None
for k in dict_df.keys():
    df_in = dict_df[k]
    if len(df_in)>1:
        df_in_with_commod = df_in.copy()
        df_in_with_commod['symbol'] = k
        if df_all_in is None:
            df_all_in = df_in_with_commod.copy()
        else:
            df_all_in = df_all_in.append(df_in_with_commod,ignore_index=True)
    last_date = df_in.trade_date.max()
    xc = 'trade_date'
    plot_title = f'{k} last trade date {last_date}'
    y_left_label = 'y nav'
    y_right_label = 'noncomm'
    
    try:
        fig = ju.plotly_pandas(df_in[['trade_date', 'nav', y_right_label]],x_column=xc,plot_title=plot_title,y_left_label=y_left_label,y_right_label=y_right_label)
        iplot(fig)
    except Exception as e:
        print(f'{k} {str(e)}')
df_all_in.to_csv(f'{TEMP_FOLDER}/df_all_in.csv',index=False)



Bummer! Plotly can currently only draw Line2D objects from matplotlib that are in 'data' coordinates!


I found a path object that I don't think is part of a bar chart. Ignoring.



In [13]:
# this is the old "non plotly" plotting routine
# ju.multi_df_plot(dict_df=dict_df,x_column='trade_date',num_of_x_ticks=40,save_file_prefix='cot_nav_plot',save_image_folder='./temp_folder/saved_images')    


___
### Try various strategies based on above charts
___

In [14]:
field_to_chart = 'nav'
for sym in dict_df.keys():
    df_all_in = dict_df[sym].copy()    
    df_all_in['noncomm_prev'] = df_all_in.noncomm.shift(1)
    df_all_in['is_transition'] = df_all_in.apply(lambda r: 1 if r.noncomm / r.noncomm_prev < 0 else 0,axis=1)
    df_all_in['trans_sign'] = df_all_in.apply(lambda r: 1 if r.noncomm - r.noncomm_prev > 0 else -1,axis=1)
    df_all_in['transition'] = df_all_in.is_transition * df_all_in.trans_sign
    df_all_in2 = df_all_in[['trade_date',field_to_chart,'transition']]
    fig = ju.plotly_pandas(df_all_in2[['trade_date',field_to_chart,'transition']],x_column='trade_date',plot_title=sym)
    iplot(fig)
# len(df_all_in[df_all_in.tran_count==1])/len(df_all_in)

___
### The cells below help you find commodities in df_cot2 and df_oi
___

In [15]:
list(filter(lambda s: str(s)!='nan' and 'CANADIAN' in s and 'FUTURE' in s,list(set(df_oi.Product_Description))))


['CANADIAN HEAVY CRUDE(NET ENRGY) FUTURES', 'CANADIAN DOLLAR FUTURE']

In [16]:
m_and_e_list = list(filter(lambda s: str(s)!='nan' and 'CANADIAN' in s ,list(set(df_cot2.Market_and_Exchange_Names))))
m_and_e_list

['CANADIAN HVY CRUDE NET ENRGY - NEW YORK MERCANTILE EXCHANGE',
 'CANADIAN DOLLAR - INTERNATIONAL MONETARY MARKET',
 'CANADIAN DOLLAR - CHICAGO MERCANTILE EXCHANGE',
 'NORTHWEST PIPELINE - CANADIAN BORDER (BASIS) - ICE FUTURES ENERGY DIV']

In [17]:
[c for c in df_cot2.columns.values if 'date' in c.lower()]

['As_of_Date_in_Form_YYMMDD', 'As_of_Date_in_Form_YYYY_MM_DD']

In [18]:
for m_and_e in m_and_e_list:
    oi = df_cot2[df_cot2.Market_and_Exchange_Names==m_and_e].Open_Interest_All.sum()
    ld = df_cot2[df_cot2.Market_and_Exchange_Names==m_and_e].As_of_Date_in_Form_YYYY_MM_DD.max()
    print(f'{m_and_e}, {oi:,}, {str(ld)[0:10]}')
# print(df_cot2[df_cot2.Market_and_Exchange_Names=='POUND STERLING - INTERNATIONAL MONETARY MARKET'].Open_Interest_All.sum())
# print(df_cot2[df_cot2.Market_and_Exchange_Names=='POUND STERLING - CHICAGO MERCANTILE EXCHANGE'].Open_Interest_All.sum())



CANADIAN HVY CRUDE NET ENRGY - NEW YORK MERCANTILE EXCHANGE, 2,644,853, 2017-08-15
CANADIAN DOLLAR - INTERNATIONAL MONETARY MARKET, 2,254,680, 2000-08-22
CANADIAN DOLLAR - CHICAGO MERCANTILE EXCHANGE, 112,671,342, 2019-08-13
NORTHWEST PIPELINE - CANADIAN BORDER (BASIS) - ICE FUTURES ENERGY DIV, 26,411,301, 2019-08-13


In [19]:
list(filter(lambda s: 'Comm' in s,df_cot2.columns.values))

['%_of_OI_Commercial_Long_All',
 '%_of_OI_Commercial_Long_Old',
 '%_of_OI_Commercial_Long_Other',
 '%_of_OI_Commercial_Short_All',
 '%_of_OI_Commercial_Short_Old',
 '%_of_OI_Commercial_Short_Other',
 'CFTC_Commodity_Code',
 'CFTC_Commodity_Code_Quotes',
 'CFTC_Commodity_Code_Quotes_',
 'Change_in_Commercial_Long_All',
 'Change_in_Commercial_Short_All',
 'Commercial_Positions_Long_All',
 'Commercial_Positions_Long_Old',
 'Commercial_Positions_Long_Other',
 'Commercial_Positions_Short_All',
 'Commercial_Positions_Short_Old',
 'Commercial_Positions_Short_Other',
 'Traders_Commercial_Long_All',
 'Traders_Commercial_Long_Old',
 'Traders_Commercial_Long_Other',
 'Traders_Commercial_Short_All',
 'Traders_Commercial_Short_Old',
 'Traders_Commercial_Short_Other']

In [20]:
df_cot2_cl = df_cot2[df_cot2.Market_and_Exchange_Names=='CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE']
df_cot2_cl = df_cot2_cl[['As_of_Date_in_Form_YYMMDD','Commercial_Positions_Long_All']]
df_cot2_cl = df_cot2_cl.sort_values('As_of_Date_in_Form_YYMMDD')
df_cot2_cl.tail()

Unnamed: 0,As_of_Date_in_Form_YYMMDD,Commercial_Positions_Long_All
27431,190716,718705
27432,190723,710726
27433,190730,732821
27434,190806,760794
27435,190813,715157


## END

In [21]:
comod = 'soyb'
fig = ju.plotly_pandas(dict_df[comod][['trade_date', 'noncomm', 'noncommratio']],x_column=xc,plot_title=comod,y_left_label=y_left_label,y_right_label=y_right_label)
iplot(fig)


In [22]:
def get_cot2(id):
    basic_cols = ['Market_and_Exchange_Names','As_of_Date_in_Form_YYYY_MM_DD','Open_Interest_All']
    long_cols = ['Noncommercial_Positions_Long_All','Commercial_Positions_Long_All',
                'Nonreportable_Positions_Long_All','Traders_Commercial_Long_All',
                 'Traders_Noncommercial_Long_All','Traders_Total_Reportable_Long_All']
    short_cols = ['Noncommercial_Positions_Short_All','Commercial_Positions_Short_All',
                'Nonreportable_Positions_Short_All','Total_Reportable_Positions_Short_All',
                'Traders_Commercial_Short_All','Traders_Noncommercial_Short_All',
                'Traders_Total_Reportable_Short_All']

    df_ret = df_cot2[df_cot2.Market_and_Exchange_Names==ID_DICT[id]['COT']]
    df_ret = df_ret[basic_cols + long_cols + short_cols]
    return df_ret
dfc = get_cot2('cl')


In [23]:
dfc.As_of_Date_in_Form_YYYY_MM_DD.max()

Timestamp('2019-08-13 00:00:00')

In [24]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(dfc[dfc.As_of_Date_in_Form_YYYY_MM_DD=='2019-04-23'].iloc[0])


Market_and_Exchange_Names               CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE
As_of_Date_in_Form_YYYY_MM_DD                                             2019-04-23 00:00:00
Open_Interest_All                                                                     2139213
Noncommercial_Positions_Long_All                                                       644670
Commercial_Positions_Long_All                                                          716970
Nonreportable_Positions_Long_All                                                        96400
Traders_Commercial_Long_All                                                                91
Traders_Noncommercial_Long_All                                                            152
Traders_Total_Reportable_Long_All                                                         309
Noncommercial_Positions_Short_All                                                       97311
Commercial_Positions_Short_All                              

In [25]:
dfc[dfc.Noncommercial_Positions_Short_All==0].Noncommercial_Positions_Short_All



Series([], Name: Noncommercial_Positions_Short_All, dtype: int64)

In [26]:
df_test = pd.DataFrame({'x':np.arange(0,40),'y':np.arange(0,40)*5})

In [27]:
f = ju.plotly_pandas(df_test,x_column='x',bar_plot=False)

In [28]:
f


Figure({
    'data': [{'line': {'color': 'rgba (31, 119, 180, 1)', 'dash': 'solid', 'width': 1.5},
              'mode': 'lines',
              'name': 'y',
              'type': 'scatter',
              'uid': '2304859a-c0ee-11e9-a858-8c859025e7f9',
              'x': [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
                    12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0,
                    22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0,
                    32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0],
              'xaxis': 'x',
              'y': [0.0, 5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0,
                    50.0, 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0, 90.0, 95.0,
                    100.0, 105.0, 110.0, 115.0, 120.0, 125.0, 130.0, 135.0, 140.0,
                    145.0, 150.0, 155.0, 160.0, 165.0, 170.0, 175.0, 180.0, 185.0,
                    190.0, 195.0],
              'yaxis': 'y'}],
    'layout': {'title