# options_table_loader.ipynb
### Write options daily settlement files for a range of years and months to Postgres 

#### Overview:
Options and futures daily settlement data is written to a Postgres database with the following characteristics:

    
|  Type  |      Name     | 
|--------|---------------|
|Database|   *sec_db*    |
| Schema | *sec_schema*  |
| Table  |*options_table*|
| Table  |*futures_table*|


#### Usage:
1. Set commodities to update in sec_db database
```
CONTRACT_LIST = ['CL','CB','ES']
```
2. If performing an update of the sec_db database for a single month, set the variable `SINGLE_YYYYMM` to a value like `201909` in the second cell below.  Also, set `WRITE_TO_POSTGRES` to `True`
```
SINGLE_YYYYMM = 201909
WRITE_TO_POSTGRES = True
```
3. If performing an update on multiple months, set the variables `BEGIN_YEAR` and `END_YEAR` to values like `2011` and `2020` in the second cell below.  Also, set `WRITE_TO_POSTGRES` to `False`
```
BEGIN_YEAR = 2011
END_YEAR = 2019
WRITE_TO_POSTGRES = False
```


In [None]:
import zipfile
import glob
import pandas as pd
import numpy as np

from argparse import ArgumentParser
from argparse import RawDescriptionHelpFormatter
import sys
import os
if  not './' in sys.path:
    sys.path.append('./')
if  not '../' in sys.path:
    sys.path.append('../')

from barchartacs import build_db
from barchartacs import db_info
import datetime
import io
from tqdm import tqdm,tqdm_notebook
from barchartacs import pg_pandas as pg
import importlib
importlib.reload(db_info)



DB_USER_NAME = None
DB_NAME = 'sec_db'
SCHEMA_NAME = 'sec_schema'
UNDERLYING_TABLE_NAME = 'options_table'
FULL_TABLE_NAME = f'{SCHEMA_NAME}.{UNDERLYING_TABLE_NAME}'
CSV_TEMP_PATH = os.path.abspath('./temp_folder/df_all_temp.csv')


In [None]:
ONLY_DO_SINGLE_MONTH = True
WRITE_TO_POSTGRES = True
CONTRACT_LIST = ['CL','CB','ES']#['NG']
STRIKE_DIVISOR_DICT = {'GE':100,'NG':100,'CL':10,'CB':10}
STRIKE_DIVISOR = None
SINGLE_YYYYMM = 201909
BEGIN_YEAR = 2011
END_YEAR = 2020
csv_temp_path = CSV_TEMP_PATH #'./temp_folder/df_all_temp.csv'

In [None]:
def psql_copy():
    global DB_USER_NAME,WRITE_TO_POSTGRES
    copy_cmd = f"\COPY {FULL_TABLE_NAME} FROM '{CSV_TEMP_PATH}' DELIMITER ',' CSV HEADER;"
#     copy_cmd = f"select count(*) from {FULL_TABLE_NAME};"
    username_clause = ''
    if DB_USER_NAME is not None:
        psql_cmd = f'sudo -u {DB_USER_NAME} psql -d testdb -c "CMD"'
    else:
        psql_cmd = f'psql  -d sec_db -c "CMD"'
    psql_cmd = psql_cmd.replace('CMD',copy_cmd)
    if  WRITE_TO_POSTGRES:  # double check !!!
       !{psql_cmd}
    else:
        print(psql_cmd)
 

### Execute a range of years

In [None]:
if not ONLY_DO_SINGLE_MONTH:
    zip_folder_parent = open('./temp_folder/zip_folder_parent.txt','r').read() + "/options"
    df_all = None

    # for yyyy in tqdm_notebook(np.arange(2011,2020)):
    for yyyy in tqdm_notebook(np.arange(BEGIN_YEAR,END_YEAR)):
        print(f'executing year {yyyy} at {datetime.datetime.now()}')
        for mm in tqdm_notebook(np.arange(1,13)):
            yyyymm = yyyy*100 + mm
            print(yyyymm)
            bdb = build_db.BuildDb(zip_folder_parent,yyyymm,strike_divisor_dict=STRIKE_DIVISOR_DICT,
                                   contract_list=CONTRACT_LIST,write_to_database=False)
            try:
                df_temp = bdb.execute()            
                if df_all is None:
                    df_all = df_temp.copy()
                else:
                    df_all = df_all.append(df_temp)
                    df_all.index = list(range(len(df_all)))
            except Exception as e:
                bdb.logger.warn(f'ERROR MAIN LOOP: {str(e)}')

    # write all data to a csv file, that will be used in the postgres COPY command
    df_all.to_csv(csv_temp_path,index=False)


# ONLY Execute this cell if you want a specific yyyymm

In [None]:
if ONLY_DO_SINGLE_MONTH:
    yyyymm = SINGLE_YYYYMM
    zip_folder_parent = open('./temp_folder/zip_folder_parent.txt','r').read() + "/options"
    df_single = None

    bdb = build_db.BuildDb(zip_folder_parent,yyyymm,strike_divisor_dict=STRIKE_DIVISOR_DICT,
                           contract_list=CONTRACT_LIST,write_to_database=False)
    try:
        df_temp = bdb.execute()            
        if df_single is None:
            df_single = df_temp.copy()
        else:
            df_single = df_single.append(df_temp)
            df_single.index = list(range(len(df_single)))
    except Exception as e:
        bdb.logger.warn(f'ERROR MAIN LOOP: {str(e)}')# NOW WRITE THIS DATA FOR THIS YEAR
    df_single.to_csv(csv_temp_path,index=False)


In [None]:
if WRITE_TO_POSTGRES:
    bdb.logger.info(f'MAIN LOOP: writing data f to database')
    psql_copy()
#     abspath = os.path.abspath(csv_temp_path)
#     bdb.pga.exec_sql_raw(f"COPY sec_schema.options_table FROM '{abspath}' DELIMITER ',' CSV HEADER")    

In [None]:
def get_data_by_contract(pga,contract,
            month_code_clause=None,
             strike_clause=None,
             pc_clause=None,
             settle_date_clause=None,
             limit=None):
    opttab = 'sec_schema.options_table'
    cl_month_code = '' if month_code_clause is None else f"and substring(symbol,3,1) {month_code_clause}"
    cl_strike = '' if strike_clause is None else f"and strike {strike_clause}"
    cl_pc = '' if pc_clause is None else f"and pc  {pc_clause}"
    cl_sd = '' if settle_date_clause is None else f"and 'settle_date {settle_date_clause}"
    clim = '' if limit is None else f"limit {limit}"
    other_criteria = f'{cl_month_code} {cl_strike} {cl_pc} {cl_sd} {clim}'
    sql = f"select * from {opttab} where substring(symbol,1,2) = '{contract}' {other_criteria};"
    print(sql)
    df=  pga.get_sql(sql)
    return df




In [None]:
if ONLY_DO_SINGLE_MONTH:
    df_single.symbol.unique()

In [None]:
#!jupyter nbconvert step_02_options_table_loader.ipynb --to python