## Build history of commodity COTs

1. Retrieve data from the CFTC website (www.cft.gov/files);
2. Extract data from the most important Commercial and Non Commercial long and short columns;
3. Create "net" columns for each important category;
4. Merge this data with the ETF history data created in the previous steps


In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
import os, sys,glob
import datetime
%matplotlib inline
import matplotlib.pyplot as plt
# import plotly.plotly as py
import plotly.graph_objs as go
import zipfile
import urllib.request
from PIL import Image
def str_to_date(d):
    try:
        dt = datetime.datetime.strptime(str(d),'%Y-%m-%d')
    except:
        return None
    return dt

# Make important folders
TEMP_FOLDER = './temp_folder'
try:
    os.mkdir(TEMP_FOLDER)
except:
    pass
try:
    os.mkdir(f'{TEMP_FOLDER}/cot')
except:
    pass
try:
    os.mkdir(f'{TEMP_FOLDER}/zip')
except:
    pass


___
## First, decide if you want to re-create the ETF and COT data, or just retrieve the previously saved data DataFrames.

In [3]:
CREATE_COT_DATA = True
cot_save_path = './cot_new_history.csv'
cot_net_save_path = './cot_net_new_history.csv'
YEAR_OFFSET = 0 if datetime.datetime.now() > datetime.datetime(2020,1,10) else 1

In [4]:
YEAR_OFFSET

0

### Main column names

In [5]:
basic_cols = ['Market_and_Exchange_Names','As_of_Date_In_Form_YYMMDD','Open_Interest_All']
long_cols = basic_cols + ['Prod_Merc_Positions_Long_All','Swap_Positions_Long_All',
            'M_Money_Positions_Long_All','Other_Rept_Positions_Long_All',
            'NonRept_Positions_Long_All','Tot_Rept_Positions_Long_All']
short_cols = basic_cols + ['Prod_Merc_Positions_Short_All','Swap_Positions_Short_All',
            'M_Money_Positions_Short_All','Other_Rept_Positions_Short_All',
            'NonRept_Positions_Short_All','Tot_Rept_Positions_Short_All']
summary_types = ['prod','swap','monman','other','nonrep','totrep']
summary_cols_dict = {summary_types[i]:(long_cols[i+len(basic_cols)],short_cols[i+len(basic_cols)]) for i in range(len(summary_types))}


In [6]:
summary_cols_dict

{'prod': ('Prod_Merc_Positions_Long_All', 'Prod_Merc_Positions_Short_All'),
 'swap': ('Swap_Positions_Long_All', 'Swap_Positions_Short_All'),
 'monman': ('M_Money_Positions_Long_All', 'M_Money_Positions_Short_All'),
 'other': ('Other_Rept_Positions_Long_All', 'Other_Rept_Positions_Short_All'),
 'nonrep': ('NonRept_Positions_Long_All', 'NonRept_Positions_Short_All'),
 'totrep': ('Tot_Rept_Positions_Long_All', 'Tot_Rept_Positions_Short_All')}

___
## Process CFTC COT Data
___

### Initial processing
1. Download zip files from www.cft.gov/files;
2. Unip the files using the zipfile package;
3. Read each csv (usually named Annual.TXT), and merge them into the df_cot DataFrame.


In [7]:
zip_download_folder = f'{TEMP_FOLDER}/zip'

In [8]:
f'{zip_download_folder}/*.csv'

'./temp_folder/zip/*.csv'

In [9]:
glob.glob(f'{zip_download_folder}/*')

['./temp_folder/zip/Annual.TXT',
 './temp_folder/zip/annual_2007.txt',
 './temp_folder/zip/dea_fut_xls_2000.0.zip',
 './temp_folder/zip/dea_fut_xls_2001.0.zip',
 './temp_folder/zip/dea_fut_xls_2002.0.zip',
 './temp_folder/zip/dea_fut_xls_2003.0.zip',
 './temp_folder/zip/dea_fut_xls_2004.0.zip',
 './temp_folder/zip/dea_fut_xls_2005.0.zip',
 './temp_folder/zip/dea_fut_xls_2006.0.zip',
 './temp_folder/zip/dea_fut_xls_2007.0.zip',
 './temp_folder/zip/dea_fut_xls_2008.0.zip',
 './temp_folder/zip/dea_fut_xls_2009.0.zip',
 './temp_folder/zip/dea_fut_xls_2010.0.zip',
 './temp_folder/zip/dea_fut_xls_2011.0.zip',
 './temp_folder/zip/dea_fut_xls_2012.0.zip',
 './temp_folder/zip/dea_fut_xls_2013.0.zip',
 './temp_folder/zip/dea_fut_xls_2014.0.zip',
 './temp_folder/zip/dea_fut_xls_2015.0.zip',
 './temp_folder/zip/dea_fut_xls_2016.0.zip',
 './temp_folder/zip/dea_fut_xls_2017.0.zip',
 './temp_folder/zip/dea_fut_xls_2018.0.zip',
 './temp_folder/zip/dea_fut_xls_2019.0.zip',
 './temp_folder/zip/dea_fut_x

In [10]:
if CREATE_COT_DATA:
    last_year = datetime.datetime.now().year
    year_list = np.arange(2010,last_year+1)
    zip_download_folder = f'{TEMP_FOLDER}/zip'
    df_cot_temp = None
    df_cot = None
    for y in year_list:
        yint = int(y)
#         url = f"https://www.cftc.gov/files/dea/history/deacot{yint}.zip" 
        url = f'https://www.cftc.gov/files/dea/history/fut_disagg_txt_{yint}.zip'
        path_to_zip_file = f'{zip_download_folder}/fut_disagg_txt_{y}.zip'
        if len(glob.glob(f'{zip_download_folder}/fut_disagg_txt_{y}.csv'))>0:
            print(f'Already downloaded fut_disagg_txt_{y}.csv')
            continue
        print(f'About to downloaded fut_disagg_txt_{y}.csv')


About to downloaded fut_disagg_txt_2010.csv
About to downloaded fut_disagg_txt_2011.csv
About to downloaded fut_disagg_txt_2012.csv
About to downloaded fut_disagg_txt_2013.csv
About to downloaded fut_disagg_txt_2014.csv
About to downloaded fut_disagg_txt_2015.csv
About to downloaded fut_disagg_txt_2016.csv
About to downloaded fut_disagg_txt_2017.csv
About to downloaded fut_disagg_txt_2018.csv
About to downloaded fut_disagg_txt_2019.csv
About to downloaded fut_disagg_txt_2020.csv


In [11]:
if CREATE_COT_DATA:
    last_year = datetime.datetime.now().year - YEAR_OFFSET
    year_list = np.arange(2010,last_year+1)
    zip_download_folder = f'{TEMP_FOLDER}/zip'
    df_cot_temp = None
    df_cot = None
    for y in year_list:
        yint = int(y)
#         url = f"https://www.cftc.gov/files/dea/history/deacot{yint}.zip" 
        url = f'https://www.cftc.gov/files/dea/history/fut_disagg_txt_{yint}.zip'
        path_to_zip_file = f'{zip_download_folder}/fut_disagg_txt_{y}.zip'
        if len(glob.glob(f'{zip_download_folder}/fut_disagg_txt_{y}.csv'))>0:
            print(f'Already downloaded fut_disagg_txt_{y}.csv')
            continue
        if not os.path.isfile(path_to_zip_file) or y >= last_year:
            print(f'retrieving cot zip file from {url}')
            try:
                urllib.request.urlretrieve(url, path_to_zip_file)    
            except:
                import time
                time.sleep(1)
                urllib.request.urlretrieve(url, path_to_zip_file)    
        zip_ref = zipfile.ZipFile(path_to_zip_file, 'r')
        zip_ref.extractall(zip_download_folder)
        zip_ref.close()
#         df_cot_temp = pd.read_csv(f'{zip_download_folder}/Annual.TXT')
        df_cot_temp = pd.read_csv(f'{zip_download_folder}/f_year.txt')
        if df_cot is None:
            df_cot = df_cot_temp.copy()
        else:
            df_cot = df_cot.append(df_cot_temp,ignore_index=True)
            df_cot.index = list(range(len(df_cot)))
        print(f'processed cot csv file from {url}. Length = {len(df_cot_temp)}')
        
df_cot.head()

processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2010.zip. Length = 5547
processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2011.zip. Length = 5486
processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2012.zip. Length = 5938
processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2013.zip. Length = 8270
processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2014.zip. Length = 10271
processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2015.zip. Length = 9762
processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2016.zip. Length = 10520
processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2017.zip. Length = 10204
processed cot csv file from https://www.cftc.gov/files/dea/history/fut_disagg_txt_2018.zip. Length = 10874
processed cot csv file from https://www.cf

Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Report_Date_as_MM_DD_YYYY,CFTC_Contract_Market_Code,CFTC_Market_Code,CFTC_Region_Code,CFTC_Commodity_Code,Open_Interest_All,Prod_Merc_Positions_Long_All,Prod_Merc_Positions_Short_All,...,Conc_Net_LE_4_TDR_Short_Other,Conc_Net_LE_8_TDR_Long_Other,Conc_Net_LE_8_TDR_Short_Other,Contract_Units,CFTC_Contract_Market_Code_Quotes,CFTC_Market_Code_Quotes,CFTC_Commodity_Code_Quotes,CFTC_SubGroup_Code,FutOnly_or_Combined,Report_Date_as_YYYY-MM-DD
0,WHEAT - CHICAGO BOARD OF TRADE,101228,2010-12-28,1602,CBT,0,1,488334,49996,245335,...,16.4,41.3,22.5,"(CONTRACTS OF 5,000 BUSHELS)",1602,CBT,1,A10,FutOnly,
1,WHEAT - CHICAGO BOARD OF TRADE,101221,2010-12-21,1602,CBT,0,1,480241,51194,237590,...,16.4,41.3,22.7,"(CONTRACTS OF 5,000 BUSHELS)",1602,CBT,1,A10,FutOnly,
2,WHEAT - CHICAGO BOARD OF TRADE,101214,2010-12-14,1602,CBT,0,1,478364,52201,235461,...,16.5,41.2,23.0,"(CONTRACTS OF 5,000 BUSHELS)",1602,CBT,1,A10,FutOnly,
3,WHEAT - CHICAGO BOARD OF TRADE,101207,2010-12-07,1602,CBT,0,1,479187,53882,229541,...,16.4,42.3,22.7,"(CONTRACTS OF 5,000 BUSHELS)",1602,CBT,1,A10,FutOnly,
4,WHEAT - CHICAGO BOARD OF TRADE,101130,2010-11-30,1602,CBT,0,1,459421,60038,198370,...,16.6,46.5,22.7,"(CONTRACTS OF 5,000 BUSHELS)",1602,CBT,1,A10,FutOnly,


___
### Make column names easier to process, make main date field a datetime object, and sort the DataFrame
___

___
### Show important columns for a specific  commodity
___

In [12]:
if CREATE_COT_DATA:
    col_rename_dict = {c:c.strip().replace('__','_').replace(' ','_').replace('-','_').replace('(','').replace(')','') for c in df_cot.columns.values}
    df_cot2 = df_cot.rename(columns=col_rename_dict)
    df_cot2 = df_cot2.drop(columns=['Report_Date_as_MM_DD_YYYY'])
    df_cot2.Market_and_Exchange_Names = df_cot2.Market_and_Exchange_Names.str.strip()
    l = lambda s:datetime.datetime(2000+int(str(s)[0:2]),int(str(s)[2:4]),int(str(s)[4:6]))
    df_cot2.As_of_Date_In_Form_YYMMDD = df_cot2.As_of_Date_In_Form_YYMMDD.apply(l)
    df_cot2 = df_cot2.sort_values(['Market_and_Exchange_Names','As_of_Date_In_Form_YYMMDD'])
    df_cot2.to_csv(cot_save_path,index=False)
    
    

In [13]:
df_cot2 = pd.read_csv(cot_save_path)
df_cot2.As_of_Date_In_Form_YYMMDD = df_cot2.As_of_Date_In_Form_YYMMDD.apply(str_to_date)
cot_beg_date = datetime.datetime.now() - datetime.timedelta(2000)
# df_commod = df_cot2[df_cot2.Market_and_Exchange_Names.str.contains(commod)][df_cot2.As_of_Date_In_Form_YYMMDD>=cot_beg_date]
df_commod = df_cot2[df_cot2.As_of_Date_In_Form_YYMMDD>=cot_beg_date]
df_commod_basic = df_commod[basic_cols]
df_commod_long = df_commod[long_cols]
df_commod_short = df_commod[short_cols]


### Show basic open interest info

In [14]:
commod = 'CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE EXCHANGE'
[c for c in df_commod.Market_and_Exchange_Names.values if 'CRUDE' in c]

['BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK MERCANTILE EXCHANGE',
 'BRENT CRUDE OIL LAST DAY - NEW YORK ME

In [15]:
df_commod_basic[df_cot2.Market_and_Exchange_Names.str.contains(commod)].sort_values('As_of_Date_In_Form_YYMMDD',ascending=False).head()


Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All
15107,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-05-12,2248020
15106,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-05-05,2243871
15105,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-04-28,2261202
15104,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-04-21,2276638
15103,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-04-14,2353955


### Show important "long" position info

In [16]:
df_commod_long[df_cot2.Market_and_Exchange_Names.str.contains(commod)].sort_values('As_of_Date_In_Form_YYMMDD',ascending=False).head()

Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,Prod_Merc_Positions_Long_All,Swap_Positions_Long_All,M_Money_Positions_Long_All,Other_Rept_Positions_Long_All,NonRept_Positions_Long_All,Tot_Rept_Positions_Long_All
15107,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-05-12,2248020,491543,134875,405071,304486,102997,2145023
15106,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-05-05,2243871,464901,136434,392191,330721,95769,2148102
15105,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-04-28,2261202,463745,133110,384326,353452,102192,2159010
15104,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-04-21,2276638,468562,146426,324667,411581,100919,2175719
15103,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-04-14,2353955,488323,138641,295870,404604,129134,2224821


### Show important "short" position info

In [17]:
df_commod_short[df_cot2.Market_and_Exchange_Names.str.contains(commod)].sort_values('As_of_Date_In_Form_YYMMDD',ascending=False).head()

Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,Prod_Merc_Positions_Short_All,Swap_Positions_Short_All,M_Money_Positions_Short_All,Other_Rept_Positions_Short_All,NonRept_Positions_Short_All,Tot_Rept_Positions_Short_All
15107,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-05-12,2248020,600425,583964,53386,115152,86045,2161975
15106,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-05-05,2243871,557148,581085,63590,128710,89483,2154388
15105,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-04-28,2261202,539452,647688,65960,82430,101295,2159907
15104,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-04-21,2276638,517090,690480,77189,71879,95517,2181121
15103,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-04-14,2353955,519210,662583,100515,89290,84974,2268981


### CREATE important "net" position info for other anlysis notebooks

In [18]:
df_commod_long.head()

Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,Prod_Merc_Positions_Long_All,Swap_Positions_Long_All,M_Money_Positions_Long_All,Other_Rept_Positions_Long_All,NonRept_Positions_Long_All,Tot_Rept_Positions_Long_All
54,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-11-25,372718,114718,72066,29207,25529,39517,333201
55,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-12-02,359634,107472,72809,30922,21658,39754,319880
56,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-12-09,361674,114748,71697,32045,22474,38045,323629
57,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-12-16,357248,108872,69940,35613,23599,37283,319965
58,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-12-23,354472,106433,71551,33041,25360,37338,317134


In [19]:
df_commod_long.head()

Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,Prod_Merc_Positions_Long_All,Swap_Positions_Long_All,M_Money_Positions_Long_All,Other_Rept_Positions_Long_All,NonRept_Positions_Long_All,Tot_Rept_Positions_Long_All
54,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-11-25,372718,114718,72066,29207,25529,39517,333201
55,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-12-02,359634,107472,72809,30922,21658,39754,319880
56,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-12-09,361674,114748,71697,32045,22474,38045,323629
57,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-12-16,357248,108872,69940,35613,23599,37283,319965
58,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-12-23,354472,106433,71551,33041,25360,37338,317134


In [20]:
df_commod_short.head()

Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,Open_Interest_All,Prod_Merc_Positions_Short_All,Swap_Positions_Short_All,M_Money_Positions_Short_All,Other_Rept_Positions_Short_All,NonRept_Positions_Short_All,Tot_Rept_Positions_Short_All
54,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-11-25,372718,160709,4681,50668,25999,38980,333738
55,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-12-02,359634,150141,3772,49407,31742,37553,322081
56,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-12-09,361674,154490,4367,52538,30594,37020,324654
57,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-12-16,357248,143482,3240,57078,34865,36642,320606
58,"#2 HEATING OIL, NY HARBOR-ULSD - NEW YORK MERC...",2014-12-23,354472,149095,900,55095,32279,36354,318118


In [21]:
df_commod_net = df_commod_long.merge(df_commod_short,how='inner',on=basic_cols)
print(len(df_commod_net),len(df_commod_long),len(df_commod_short))


57215 57215 57215


In [22]:
df_commod_net = df_commod_long.merge(df_commod_short,how='inner',on=basic_cols)
print(len(df_commod_net),len(df_commod_long),len(df_commod_short))

net_cols = [c1 for c1 in long_cols + short_cols if c1 not in basic_cols]
for c2 in net_cols:
    df_commod_net = df_commod_net[df_commod_net[c2].astype(str).str.contains('[0-9]')]
print(len(df_commod_net))


for p in summary_cols_dict.keys():
    t = summary_cols_dict[p]
    lc = t[0]
    sc = t[1]
    df_commod_net[p+'_net'] = df_commod_net[lc].astype(float) - df_commod_net[sc].astype(float)
    df_commod_net[p+'_ratio'] = df_commod_net[lc].astype(float) / df_commod_net[sc].astype(float)
print(len(df_commod_net))

sort_cols = ['Market_and_Exchange_Names','As_of_Date_In_Form_YYMMDD']
df_commod_net = df_commod_net.sort_values(sort_cols)


57215 57215 57215
57215
57215


In [23]:
ratio_cols = ['prod_ratio','monman_ratio','swap_ratio','other_ratio','nonrep_ratio']
df_commod_net[df_commod_net.Market_and_Exchange_Names.str.contains(commod)][sort_cols + ratio_cols].sort_values(sort_cols,ascending=False).head()



Unnamed: 0,Market_and_Exchange_Names,As_of_Date_In_Form_YYMMDD,prod_ratio,monman_ratio,swap_ratio,other_ratio,nonrep_ratio
9414,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-05-12,0.818658,7.587589,0.230965,2.644209,1.197013
9413,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-05-05,0.83443,6.167495,0.234792,2.569505,1.070248
9412,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-04-28,0.859659,5.826653,0.205516,4.287905,1.008855
9411,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-04-21,0.906152,4.20613,0.212064,5.726026,1.056555
9410,"CRUDE OIL, LIGHT SWEET - NEW YORK MERCANTILE E...",2020-04-14,0.940512,2.943541,0.209243,4.531347,1.519688


In [24]:
nan_cols = df_commod_net.columns[df_commod_net.isna().any()].tolist()
if len(nan_cols)>0:
    print(nan_cols)
    df_commod_net = df_commod_net.fillna(0)
    print(df_commod_net[df_commod_net.monman_ratio.isnull()][ratio_cols])

['swap_ratio', 'monman_ratio', 'other_ratio', 'nonrep_ratio']
Empty DataFrame
Columns: [prod_ratio, monman_ratio, swap_ratio, other_ratio, nonrep_ratio]
Index: []


In [25]:
df_commod_net.columns.values

array(['Market_and_Exchange_Names', 'As_of_Date_In_Form_YYMMDD',
       'Open_Interest_All', 'Prod_Merc_Positions_Long_All',
       'Swap_Positions_Long_All', 'M_Money_Positions_Long_All',
       'Other_Rept_Positions_Long_All', 'NonRept_Positions_Long_All',
       'Tot_Rept_Positions_Long_All', 'Prod_Merc_Positions_Short_All',
       'Swap_Positions_Short_All', 'M_Money_Positions_Short_All',
       'Other_Rept_Positions_Short_All', 'NonRept_Positions_Short_All',
       'Tot_Rept_Positions_Short_All', 'prod_net', 'prod_ratio',
       'swap_net', 'swap_ratio', 'monman_net', 'monman_ratio',
       'other_net', 'other_ratio', 'nonrep_net', 'nonrep_ratio',
       'totrep_net', 'totrep_ratio'], dtype=object)

In [26]:
print(f'saving {len(df_commod_net)} records')
df_commod_net.to_csv(cot_net_save_path,index=False)

saving 57215 records


## END

In [27]:
len(df_commod_net[df_commod_net.Market_and_Exchange_Names.str.contains('SILVER - COMMODITY EXCHANGE INC.')])

285

In [28]:
df_cot2[df_cot2.Market_and_Exchange_Names.str.lower().str.contains('10')].Market_and_Exchange_Names.unique()


array(['1000 CALIFORNIA CARBON ALLOWANCE - ICE FUTURES ENERGY DIV',
       'HENRY HUB NAT GAS FINL-10000 - NASDAQ FUTURES',
       'HHUB NAT GAS PENULT FINL-10000 - NASDAQ FUTURES',
       'NFX SM10TC SUPRAMAX 10T/C AVG - NASDAQ FUTURES'], dtype=object)

In [29]:
list(filter(lambda s:'As_of_Date_In_Form_YYMMDD' in s,df_cot2.columns.values))

['As_of_Date_In_Form_YYMMDD']

In [30]:
df_wheat = df_cot2[df_cot2.Market_and_Exchange_Names.str.contains('WHEAT-SRW - CHICAGO BOARD OF TRADE')]
len(df_wheat)
# np.sort(df_wheat.As_of_Date_In_Form_YYMMDD.unique())

334

In [31]:
df_cot2[df_cot2.Market_and_Exchange_Names.str.contains('WHEAT-HRW - CHICAGO BOARD OF TRADE')].As_of_Date_In_Form_YYMMDD.max()


Timestamp('2020-05-12 00:00:00')