## Build history of commodity COTs

1. Retrieve data from the CFTC website (www.cft.gov/files);
2. Extract data from the most important Commercial and Non Commercial long and short columns;
3. Create "net" columns for each important category;
4. Merge this data with the ETF history data created in the previous steps


In [1]:
import pandas as pd
import numpy as np
import os, sys,glob
import datetime
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.plotly as py
import plotly.graph_objs as go
import zipfile
import urllib.request
from PIL import Image
def str_to_date(d):
    try:
        dt = datetime.datetime.strptime(str(d),'%Y-%m-%d')
    except:
        return None
    return dt

# Make important folders
TEMP_FOLDER = './temp_folder'
try:
    os.mkdir(TEMP_FOLDER)
except:
    pass
try:
    os.mkdir(f'{TEMP_FOLDER}/cot')
except:
    pass
try:
    os.mkdir(f'{TEMP_FOLDER}/zip')
except:
    pass


___
## First, decide if you want to re-create the ETF and COT data, or just retrieve the previously saved data DataFrames.

In [2]:
CREATE_COT_DATA = True
cot_save_path = './cot_new_history.csv'
cot_net_save_path = './cot_net_new_history.csv'

### Main column names

In [3]:
basic_cols = ['Market_and_Exchange_Names','As_of_Date_In_Form_YYMMDD','Open_Interest_All']
long_cols = basic_cols + ['Prod_Merc_Positions_Long_All','Swap_Positions_Long_All',
            'M_Money_Positions_Long_All','Other_Rept_Positions_Long_All',
            'NonRept_Positions_Long_All','Tot_Rept_Positions_Long_All']
short_cols = basic_cols + ['Prod_Merc_Positions_Short_All','Swap_Positions_Short_All',
            'M_Money_Positions_Short_All','Other_Rept_Positions_Short_All',
            'NonRept_Positions_Short_All','Tot_Rept_Positions_Short_All']
summary_types = ['prod','swap','monman','other','nonrep','totrep']
summary_cols_dict = {summary_types[i]:(long_cols[i+len(basic_cols)],short_cols[i+len(basic_cols)]) for i in range(len(summary_types))}


In [4]:
summary_cols_dict

{'prod': ('Prod_Merc_Positions_Long_All', 'Prod_Merc_Positions_Short_All'),
 'swap': ('Swap_Positions_Long_All', 'Swap_Positions_Short_All'),
 'monman': ('M_Money_Positions_Long_All', 'M_Money_Positions_Short_All'),
 'other': ('Other_Rept_Positions_Long_All', 'Other_Rept_Positions_Short_All'),
 'nonrep': ('NonRept_Positions_Long_All', 'NonRept_Positions_Short_All'),
 'totrep': ('Tot_Rept_Positions_Long_All', 'Tot_Rept_Positions_Short_All')}

___
## Process CFTC COT Data
___

### Initial processing
1. Download zip files from www.cft.gov/files;
2. Unip the files using the zipfile package;
3. Read each csv (usually named Annual.TXT), and merge them into the df_cot DataFrame.


In [5]:
zip_download_folder = f'{TEMP_FOLDER}/zip'

In [6]:
f'{zip_download_folder}/*.csv'

'./temp_folder/zip/*.csv'

In [7]:
glob.glob(f'{zip_download_folder}/*')

['./temp_folder/zip/Annual.TXT',
 './temp_folder/zip/annual_2007.txt',
 './temp_folder/zip/dea_fut_xls_2000.0.zip',
 './temp_folder/zip/dea_fut_xls_2001.0.zip',
 './temp_folder/zip/dea_fut_xls_2002.0.zip',
 './temp_folder/zip/dea_fut_xls_2003.0.zip',
 './temp_folder/zip/dea_fut_xls_2004.0.zip',
 './temp_folder/zip/dea_fut_xls_2005.0.zip',
 './temp_folder/zip/dea_fut_xls_2006.0.zip',
 './temp_folder/zip/dea_fut_xls_2007.0.zip',
 './temp_folder/zip/dea_fut_xls_2008.0.zip',
 './temp_folder/zip/dea_fut_xls_2009.0.zip',
 './temp_folder/zip/dea_fut_xls_2010.0.zip',
 './temp_folder/zip/dea_fut_xls_2011.0.zip',
 './temp_folder/zip/dea_fut_xls_2012.0.zip',
 './temp_folder/zip/dea_fut_xls_2013.0.zip',
 './temp_folder/zip/dea_fut_xls_2014.0.zip',
 './temp_folder/zip/dea_fut_xls_2015.0.zip',
 './temp_folder/zip/dea_fut_xls_2016.0.zip',
 './temp_folder/zip/dea_fut_xls_2017.0.zip',
 './temp_folder/zip/dea_fut_xls_2018.0.zip',
 './temp_folder/zip/dea_fut_xls_2019.0.zip',
 './temp_folder/zip/f_year.tx

In [8]:
if CREATE_COT_DATA:
    last_year = datetime.datetime.now().year
    year_list = np.arange(2010,last_year+1)
    zip_download_folder = f'{TEMP_FOLDER}/zip'
    df_cot_temp = None
    df_cot = None
    for y in year_list:
        yint = int(y)
#         url = f"https://www.cftc.gov/files/dea/history/deacot{yint}.zip" 
        url = f'https://www.cftc.gov/files/dea/history/fut_disagg_txt_{yint}.zip'
        path_to_zip_file = f'{zip_download_folder}/fut_disagg_txt_{y}.zip'
        if len(glob.glob(f'{zip_download_folder}/fut_disagg_txt_{y}.csv'))>0:
            print(f'Already downloaded fut_disagg_txt_{y}.csv')
            continue
        print(f'About to downloaded fut_disagg_txt_{y}.csv')


About to downloaded fut_disagg_txt_2010.csv
About to downloaded fut_disagg_txt_2011.csv
About to downloaded fut_disagg_txt_2012.csv
About to downloaded fut_disagg_txt_2013.csv
About to downloaded fut_disagg_txt_2014.csv
About to downloaded fut_disagg_txt_2015.csv
About to downloaded fut_disagg_txt_2016.csv
About to downloaded fut_disagg_txt_2017.csv
About to downloaded fut_disagg_txt_2018.csv
About to downloaded fut_disagg_txt_2019.csv


In [9]:
if CREATE_COT_DATA:
    last_year = datetime.datetime.now().year
    year_list = np.arange(2010,last_year+1)
    zip_download_folder = f'{TEMP_FOLDER}/zip'
    df_cot_temp = None
    df_cot = None
    for y in year_list:
        yint = int(y)
#         url = f"https://www.cftc.gov/files/dea/history/deacot{yint}.zip" 
        url = f'https://www.cftc.gov/files/dea/history/fut_disagg_txt_{yint}.zip'
        path_to_zip_file = f'{zip_download_folder}/fut_disagg_txt_{y}.zip'
        if len(glob.glob(f'{zip_download_folder}/fut_disagg_txt_{y}.csv'))>0:
            print(f'Already downloaded fut_disagg_txt_{y}.csv')
            continue
        if not os.path.isfile(path_to_zip_file) or y >= last_year:
            print(f'retrieving cot zip file from {url}')
            try:
                urllib.request.urlretrieve(url, path_to_zip_file)    
            except:
                import time
                time.sleep(1)
                urllib.request.urlretrieve(url, path_to_zip_file)    
        zip_ref = zipfile.ZipFile(path_to_zip_file, 'r')
        zip_ref.extractall(zip_download_folder)
        zip_ref.close()
#         df_cot_temp = pd.read_csv(f'{zip_download_folder}/Annual.TXT')
        df_cot_temp = pd.read_csv(f'{zip_download_folder}/f_year.txt')
        if df_cot is None:
            df_cot = df_cot_temp.copy()
        else:
            df_cot = df_cot.append(df_cot_temp,ignore_index=True)
            df_cot.index = list(range(len(df_cot)))
        print(f'processed cot csv file from {url}. Length = {len(df_cot_temp)}')
        
df_cot.head()


Columns (133,145,146,147,148,149,159,160) have mixed types. Specify dtype option on import or set low_memory=False.



processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2010.zip. Length = 5547



Columns (133,147,148,149,150,159,160) have mixed types. Specify dtype option on import or set low_memory=False.



processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2011.zip. Length = 5486



Columns (147,148,149,150,159,160) have mixed types. Specify dtype option on import or set low_memory=False.



processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2012.zip. Length = 5938



Columns (120,121,134,135,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160) have mixed types. Specify dtype option on import or set low_memory=False.



processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2013.zip. Length = 8270



Columns (120,133,134,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160) have mixed types. Specify dtype option on import or set low_memory=False.



processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2014.zip. Length = 10271
processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2015.zip. Length = 9762



Columns (133,145,146,147,148,149,157,159,160) have mixed types. Specify dtype option on import or set low_memory=False.



processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2016.zip. Length = 10520



Columns (146,147,148,149,159,160) have mixed types. Specify dtype option on import or set low_memory=False.



processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2017.zip. Length = 10204



Columns (133,145,147,159,160) have mixed types. Specify dtype option on import or set low_memory=False.



processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2018.zip. Length = 10874
retrieving cot zip file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2019.zip



Columns (133,145,146,147,160) have mixed types. Specify dtype option on import or set low_memory=False.



processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2019.zip. Length = 6662


Unnamed: 0,As_of_Date_In_Form_YYMMDD,CFTC_Commodity_Code,CFTC_Commodity_Code_Quotes,CFTC_Contract_Market_Code,CFTC_Contract_Market_Code_Quotes,CFTC_Market_Code,CFTC_Market_Code_Quotes,CFTC_Region_Code,CFTC_SubGroup_Code,Change_in_M_Money_Long_All,...,Traders_Swap_Spread_Other,Traders_Tot_All,Traders_Tot_Old,Traders_Tot_Other,Traders_Tot_Rept_Long_All,Traders_Tot_Rept_Long_Old,Traders_Tot_Rept_Long_Other,Traders_Tot_Rept_Short_All,Traders_Tot_Rept_Short_Old,Traders_Tot_Rept_Short_Other
0,101228,1,1,1602,1602,CBT,CBT,0,A10,4819,...,21,413,393,272,310,251,162,297,243,217
1,101221,1,1,1602,1602,CBT,CBT,0,A10,-1932,...,21,406,383,274,302,238,160,299,244,217
2,101214,1,1,1602,1602,CBT,CBT,0,A10,3201,...,21,421,396,281,307,244,159,312,256,221
3,101207,1,1,1602,1602,CBT,CBT,0,A10,18531,...,21,415,397,274,308,242,157,298,256,215
4,101130,1,1,1602,1602,CBT,CBT,0,A10,-17,...,19,408,387,266,284,231,140,329,282,215


___
### Make column names easier to process, make main date field a datetime object, and sort the DataFrame
___

___
### Show important columns for a specific  commodity
___

In [10]:
if CREATE_COT_DATA:
    col_rename_dict = {c:c.strip().replace('__','_').replace(' ','_').replace('-','_').replace('(','').replace(')','') for c in df_cot.columns.values}
    df_cot2 = df_cot.rename(columns=col_rename_dict)
    df_cot2 = df_cot2.drop(columns=['Report_Date_as_MM_DD_YYYY'])
    df_cot2.Market_and_Exchange_Names = df_cot2.Market_and_Exchange_Names.str.strip()
    l = lambda s:datetime.datetime(2000+int(str(s)[0:2]),int(str(s)[2:4]),int(str(s)[4:6]))
    df_cot2.As_of_Date_In_Form_YYMMDD = df_cot2.As_of_Date_In_Form_YYMMDD.apply(l)
    df_cot2 = df_cot2.sort_values(['Market_and_Exchange_Names','As_of_Date_In_Form_YYMMDD'])
    df_cot2.to_csv(cot_save_path,index=False)
    
    

In [11]:
df_cot2 = pd.read_csv(cot_save_path)
df_cot2.As_of_Date_In_Form_YYMMDD = df_cot2.As_of_Date_In_Form_YYMMDD.apply(str_to_date)
cot_beg_date = datetime.datetime.now() - datetime.timedelta(2000)
# df_commod = df_cot2[df_cot2.Market_and_Exchange_Names.str.contains(commod)][df_cot2.As_of_Date_In_Form_YYMMDD>=cot_beg_date]
df_commod = df_cot2[df_cot2.As_of_Date_In_Form_YYMMDD>=cot_beg_date]
df_commod_basic = df_commod[basic_cols]
df_commod_long = df_commod[long_cols]
df_commod_short = df_commod[short_cols]



Columns (151,154,157,160,163,166,167,168,169,170,171,172,175,178,181,183,184,186,187,189,190) have mixed types. Specify dtype option on import or set low_memory=False.



### Show basic open interest info

In [12]:
commod = 'CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE'
[c for c in df_commod.Market_and_Exchange_Names.values if 'CRUDE' in c]

['BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK ME

In [13]:
df_commod_basic[df_cot2.Market_and_Exchange_Names.str.contains(commod)].sort_values('As_of_Date_In_Form_YYMMDD',ascending=False).head()



Boolean Series key will be reindexed to match DataFrame index.



Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All
13595,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-08-13,2059135
13594,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-08-06,2070210
13593,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-07-30,2069072
13592,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-07-23,2056492
13591,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-07-16,2089629


### Show important "long" position info

In [14]:
df_commod_long[df_cot2.Market_and_Exchange_Names.str.contains(commod)].sort_values('As_of_Date_In_Form_YYMMDD',ascending=False).head()


Boolean Series key will be reindexed to match DataFrame index.



Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,Prod_Merc_Positions_Long_All,Swap_Positions_Long_All,M_Money_Positions_Long_All,Other_Rept_Positions_Long_All,NonRept_Positions_Long_All,Tot_Rept_Positions_Long_All
13595,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-08-13,2059135,403077,138885,277969,269071,72466,1986669
13594,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-08-06,2070210,443673,133538,264936,275988,76159,1994051
13593,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-07-30,2069072,435611,140203,259812,280426,78401,1990671
13592,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-07-23,2056492,416733,139146,255681,282259,77239,1979253
13591,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-07-16,2089629,423330,138174,268607,276877,84384,2005245


### Show important "short" position info

In [15]:
df_commod_short[df_cot2.Market_and_Exchange_Names.str.contains(commod)].sort_values('As_of_Date_In_Form_YYMMDD',ascending=False).head()


Boolean Series key will be reindexed to match DataFrame index.



Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,Prod_Merc_Positions_Short_All,Swap_Positions_Short_All,M_Money_Positions_Short_All,Other_Rept_Positions_Short_All,NonRept_Positions_Short_All,Tot_Rept_Positions_Short_All
13595,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-08-13,2059135,382808,530688,67580,97316,83076,1976059
13594,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-08-06,2070210,410221,541079,86287,78996,77711,1992499
13593,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-07-30,2069072,413189,549692,79192,73755,78625,1990447
13592,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-07-23,2056492,390065,563302,76957,63132,77602,1978890
13591,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-07-16,2089629,408288,584762,48964,72758,76600,2013029


### CREATE important "net" position info for other anlysis notebooks

In [16]:
df_commod_long.head()

Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,Prod_Merc_Positions_Long_All,Swap_Positions_Long_All,M_Money_Positions_Long_All,Other_Rept_Positions_Long_All,NonRept_Positions_Long_All,Tot_Rept_Positions_Long_All
15,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-02-25,300016,87546,44030,58790,14275,47082,252934
16,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-03-04,302763,89917,46543,57588,13516,45636,257127
17,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-03-11,285806,79033,47219,47479,13864,46424,239382
18,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-03-18,281962,82124,50169,39404,14077,40921,241041
19,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-03-25,271061,72126,49577,39672,14072,44170,226891


In [17]:
df_commod_long.head()

Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,Prod_Merc_Positions_Long_All,Swap_Positions_Long_All,M_Money_Positions_Long_All,Other_Rept_Positions_Long_All,NonRept_Positions_Long_All,Tot_Rept_Positions_Long_All
15,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-02-25,300016,87546,44030,58790,14275,47082,252934
16,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-03-04,302763,89917,46543,57588,13516,45636,257127
17,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-03-11,285806,79033,47219,47479,13864,46424,239382
18,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-03-18,281962,82124,50169,39404,14077,40921,241041
19,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-03-25,271061,72126,49577,39672,14072,44170,226891


In [18]:
df_commod_short.head()

Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,Prod_Merc_Positions_Short_All,Swap_Positions_Short_All,M_Money_Positions_Short_All,Other_Rept_Positions_Short_All,NonRept_Positions_Short_All,Tot_Rept_Positions_Short_All
15,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-02-25,300016,162074,6443,13097,44632,25477,274539
16,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-03-04,302763,166302,6480,15311,42220,22887,279876
17,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-03-11,285806,145135,5836,14232,42518,26298,259508
18,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-03-18,281962,136435,4191,16532,41585,27952,254010
19,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-03-25,271061,127610,4755,15963,41547,29742,241319


In [19]:
df_commod_net = df_commod_long.merge(df_commod_short,how='inner',on=basic_cols)
print(len(df_commod_net),len(df_commod_long),len(df_commod_short))


56932 56932 56932


In [20]:
df_commod_net = df_commod_long.merge(df_commod_short,how='inner',on=basic_cols)
print(len(df_commod_net),len(df_commod_long),len(df_commod_short))

net_cols = [c1 for c1 in long_cols + short_cols if c1 not in basic_cols]
for c2 in net_cols:
    df_commod_net = df_commod_net[df_commod_net[c2].astype(str).str.contains('[0-9]')]
print(len(df_commod_net))


for p in summary_cols_dict.keys():
    t = summary_cols_dict[p]
    lc = t[0]
    sc = t[1]
    df_commod_net[p+'_net'] = df_commod_net[lc].astype(float) - df_commod_net[sc].astype(float)
    df_commod_net[p+'_ratio'] = df_commod_net[lc].astype(float) / df_commod_net[sc].astype(float)
print(len(df_commod_net))

sort_cols = ['Market_and_Exchange_Names','As_of_Date_In_Form_YYMMDD']
df_commod_net = df_commod_net.sort_values(sort_cols)


56932 56932 56932
56932
56932


In [21]:
ratio_cols = ['prod_ratio','monman_ratio','swap_ratio','other_ratio','nonrep_ratio']
df_commod_net[df_commod_net.Market_and_Exchange_Names.str.contains(commod)][sort_cols + ratio_cols].sort_values(sort_cols,ascending=False).head()



Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,prod_ratio,monman_ratio,swap_ratio,other_ratio,nonrep_ratio
9107,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-08-13,1.052948,4.113184,0.261707,2.76492,0.872286
9106,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-08-06,1.081546,3.070405,0.246799,3.493696,0.980029
9105,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-07-30,1.054266,3.280786,0.255057,3.802129,0.997151
9104,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-07-23,1.068368,3.322388,0.247018,4.470934,0.995322
9103,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2019-07-16,1.036842,5.485806,0.236291,3.805451,1.101619


In [22]:
nan_cols = df_commod_net.columns[df_commod_net.isna().any()].tolist()
if len(nan_cols)>0:
    print(nan_cols)
    df_commod_net = df_commod_net.fillna(0)
    print(df_commod_net[df_commod_net.monman_ratio.isnull()][ratio_cols])

['swap_ratio', 'monman_ratio', 'other_ratio', 'nonrep_ratio']
Empty DataFrame
Columns: [prod_ratio, monman_ratio, swap_ratio, other_ratio, nonrep_ratio]
Index: []


In [23]:
df_commod_net.columns.values

array(['Market_and_Exchange_Names', 'As_of_Date_In_Form_YYMMDD',
       'Open_Interest_All', 'Prod_Merc_Positions_Long_All',
       'Swap_Positions_Long_All', 'M_Money_Positions_Long_All',
       'Other_Rept_Positions_Long_All', 'NonRept_Positions_Long_All',
       'Tot_Rept_Positions_Long_All', 'Prod_Merc_Positions_Short_All',
       'Swap_Positions_Short_All', 'M_Money_Positions_Short_All',
       'Other_Rept_Positions_Short_All', 'NonRept_Positions_Short_All',
       'Tot_Rept_Positions_Short_All', 'prod_net', 'prod_ratio',
       'swap_net', 'swap_ratio', 'monman_net', 'monman_ratio',
       'other_net', 'other_ratio', 'nonrep_net', 'nonrep_ratio',
       'totrep_net', 'totrep_ratio'], dtype=object)

In [24]:
print(f'saving {len(df_commod_net)} records')
df_commod_net.to_csv(cot_net_save_path,index=False)

saving 56932 records


## END

In [25]:
len(df_commod_net[df_commod_net.Market_and_Exchange_Names.str.contains('SILVER - COMMODITY EXCHANGE INC.')])

286

In [26]:
df_cot2[df_cot2.Market_and_Exchange_Names.str.lower().str.contains('10')].Market_and_Exchange_Names.unique()


array(['1000 CALIFORNIA CARBON ALLOWANCE - ICE FUTURES ENERGY DIV',
       'HENRY HUB NAT GAS FINL-10000 - NASDAQ FUTURES',
       'HHUB NAT GAS PENULT FINL-10000 - NASDAQ FUTURES',
       'NFX SM10TC SUPRAMAX 10T/C AVG - NASDAQ FUTURES'], dtype=object)

In [27]:
list(filter(lambda s:'As_of_Date_In_Form_YYMMDD' in s,df_cot2.columns.values))

['As_of_Date_In_Form_YYMMDD']

In [28]:
df_wheat = df_cot2[df_cot2.Market_and_Exchange_Names.str.contains('WHEAT-SRW - CHICAGO BOARD OF TRADE')]
len(df_wheat)
# np.sort(df_wheat.As_of_Date_In_Form_YYMMDD.unique())

296

In [29]:
df_cot2[df_cot2.Market_and_Exchange_Names.str.contains('WHEAT-HRW - CHICAGO BOARD OF TRADE')].As_of_Date_In_Form_YYMMDD.max()


Timestamp('2019-08-13 00:00:00')