In [None]:
import pandas as pd

from sysinit.futures.adjustedprices_from_db_multiple_to_db import process_adjusted_prices_single_instrument
from sysinit.futures.multipleprices_from_db_prices_and_csv_calendars_to_db import process_multiple_prices_single_instrument
from sysinit.futures.norgate_futures_contract_prices import (
    rename_files,
    check_prices_match, 
    transfer_norgate_prices_to_db_single, 
    transfer_norgate_prices_to_db_single_contract
)
from sysinit.futures.rollcalendars_from_arcticprices_to_csv import build_and_write_roll_calendar
from sysobjects.dict_of_futures_per_contract_prices import dictFuturesContractPrices
from sysobjects.futures_per_contract_prices import futuresContractPrices
from sysproduction.data.prices import diagPrices
from sysproduction.update_sampled_contracts import update_sampled_contracts
from sysdata.csv.csv_roll_parameters import csvRollParametersData
from sysobjects.roll_calendars import rollCalendar
from syscore.pandas.pdutils import print_full

diag_prices = diagPrices()
#norgate_path = get_production_config().get_element_or_arg_not_supplied("norgate_path")
#norgate_path = "/Users/ageach/Documents/backup/pst_jani/norgate/"
norgate_path = "/home/alpha/data/norgate/"

# barchart import
from sysdata.csv.csv_futures_contract_prices import csvFuturesContractPriceData
from sysinit.futures.barchart_futures_contract_prices import (
    BARCHART_CONFIG,
    transfer_barchart_prices_to_db_single,
    transfer_barchart_prices_to_db_single_contract,
)
barchart_path = "/home/alpha/data/barchart"


# todo
# check csv's, spike etc.: COPPER,DJSTX-SMALL,OATIES,AEX,MSCIEAFA,CHFJPY,CNH,ETHANOL,EUA,BOVESPA,CHINAA-CON,EPRA-EUROPE,JP-REALESTATE,OMX,BB3M
# MSCISING update sampled contracts crashes on missing 20220900 contract
# LEANHOG compare expected roll calendar with rob's
# JPY divide/multiply by 100 to match to IB prices
# RICE norgate has couple years more data, Norgate data needs to be divided by 100
# ETHEREUM, MSCITAIWAN, SONIA3 rename doesn't find files, which are there???
# CRUDE_W roll cal broken
# CAC check prices


# instruments added
# 2024-10-08: SP500_micro, CORN, BUND, EUROSTX, NASDAQ_micro, WHEAT, LIVECOW, GBP, SMI, US10, US20, US5, BTP, BITCOIN, SOYOIL, REDWHEAT, FEEDCOW
# 2024-10-09: DX, CHF, DOW, RUSSELL, SUGAR_WHITE, SUGAR11, GILT, BUXL, GOLD, GOLD_micro, NASDAQ, NASDAQ_micro
# 2024-10-10: KOSPI, KOSPI_mini, EUR, EUR_micro
# 2024-10-11: COCOA, COFFEE, OJ, CAD
# 2024-10-12: FTSE100, SP400
# 2024-10-14: GICS, ROBUSTA, GASOIL
# 2024-10-15: US10U, US30

INSTR = 'US30'
NORGATE_TICKER = 'UB'
print(INSTR)

In [None]:
## View individual Barchart CSV price files in one plot

In [None]:
prices = csvFuturesContractPriceData(config=BARCHART_CONFIG, datapath=barchart_path)
price_dict = prices.get_merged_prices_for_instrument(INSTR)
contract_data = dict([(contractid, data) for contractid, data in price_dict.items()])
all_contract_data = dictFuturesContractPrices([(key, futuresContractPrices(x)) for key, x in contract_data.items()])
contract_prices_final = all_contract_data.final_prices()
all_finals = pd.concat(contract_prices_final, axis=1)
# all_finals = all_finals["2022-03-01":"2023-02-01"] # slice by date
#all_finals = all_finals[["20230900","20240300"]] # slice by contract
all_finals.plot(figsize=(15,9), legend=False, lw=1, grid=True, title=f"Individual Barchart CSV contract prices for {INSTR}")

In [None]:
## import an instrument from Barchart CSV files

In [None]:
transfer_barchart_prices_to_db_single("MSCISING", datapath=barchart_path)

In [None]:
## Import single contract from Barchart CSV files

In [None]:
transfer_barchart_prices_to_db_single_contract("MSCISING", "20220900", datapath=barchart_path)

## renaming files for a single instrument from Norgate to PST format
- this moves the files from `norgate_path/Futures` to `norgate_path/Futures_conv` 

In [None]:
path_str = f"{norgate_path}/Futures"
rename_files(path_str, NORGATE_TICKER, dry_run=False)

## Check that IB and Norgate/Barchart prices match
- sometimes one is in dollars, the other cents etc
- If there is a mismatch, make a record of the instrument code and skip it for now. We will come back to it

In [None]:
path_str = f"{norgate_path}/Futures_conv"
check_prices_match(path_str, INSTR, "20240900")

## import Norgate prices
- imports from CSV to parquet

In [None]:
path_str = f"{norgate_path}/Futures_conv"

for instr in [INSTR]:
    transfer_norgate_prices_to_db_single(instr, datapath=path_str)

import CSV prices for a single contract into parquet

In [None]:
transfer_norgate_prices_to_db_single_contract(INSTR, '20221200', datapath=path_str)

## check contract prices
- view each individual contract price series on a plot

In [None]:
prices = diag_prices.db_futures_contract_price_data

price_dict = prices.get_merged_prices_for_instrument(INSTR)
contract_data = dict([(contractid, data) for contractid, data in price_dict.items()])
all_contract_data = dictFuturesContractPrices([(key, futuresContractPrices(x)) for key, x in contract_data.items()])
contract_prices_final = all_contract_data.final_prices()
all_finals = pd.concat(contract_prices_final, axis=1)
#all_finals = all_finals["2023-09-01":"2024-02-01"] # slice by date
#all_finals = all_finals[["20220900","20220900"]] # slice by contract
all_finals.plot(figsize=(15,9), legend=False, lw=1, grid=True, title=f"Individual contract prices for {INSTR}")

## view expected roll calendar

In [None]:
rollparameters = csvRollParametersData()
roll_parameters_object = rollparameters.get_roll_parameters(INSTR)

prices_dict = prices.get_merged_prices_for_instrument(INSTR)
prices_dict = prices_dict.final_prices()
approx_roll_calendar = rollCalendar.create_approx_from_prices(
    prices_dict, roll_parameters_object
)

print(f"Approx roll calendar for: {INSTR}")
print_full(approx_roll_calendar)

## build roll calendar

In [None]:
roll_calendar_output = "/home/alpha/data/futures/roll_calendars_csv"
#roll_calendar_output = "/home/alpha/pysystemtrade/data/futures/roll_calendars_csv"
#roll_calendar_output = "/Users/ageach/Dev/work/pst_jk/data/futures/roll_calendars_csv"

roll_cal = build_and_write_roll_calendar(
    INSTR,
    output_datapath=roll_calendar_output,
    check_before_writing=False
)

In [None]:
## view the prices for two contract side by side
- this can help resolve issues like "Couldn't find matching roll date for contracts...."
- enter the two offending contract keys, `earlier_contract_key` and `later_contract_key`, then run the cell

In [None]:
earlier_contract_key = "20220900"
later_contract_key = "20221200"

db_prices_dict = prices.get_merged_prices_for_instrument(INSTR)
earlier = db_prices_dict[earlier_contract_key].return_final_prices()
later = db_prices_dict[later_contract_key].return_final_prices()

combined = pd.concat([earlier, later], axis=1)
combined.columns = [earlier_contract_key, later_contract_key]

print_full(combined)

## Check resulting roll calendar, editing if necessary
- look at the actual roll calendar CSV file
- first check the pattern of contract keys per roll date
  - you can compare against the one provided: the contract keys for a particular date in the past should be the same
  - for example, the second last row in Rob's was for 2022-03-10
    - the pattern was 20220300,20220600,20220600. The new one should be the same
- next check the last row. The date on the last row sometimes gets adjusted to the last available price row
  - if that happens, edit to be what the date sequence says it should be
    - for example: the last row for SP500_micro has date of 2024-09-06. That's because the prices were downloaded around 7 September 2024
    - edit the last row to be 2024-09-11 (just look at the previous year's September row)
- finally, if there isn't a row at the end for the next future roll date, add one
  - so for SP500_micro, there should be a future row for around 10 December 2024. But there isn't, so add it
  - copy the row for December 2023, and add it to the end. Then increment each of the years
    - so '2023-12-11,20231200,20240300,20240300' becomes '2024-12-11,20241200,20250300,20250300'


## Create multiple prices

In [None]:
print(INSTR)
process_multiple_prices_single_instrument(INSTR, ADD_TO_CSV=False)

## Check multiple prices
- check start end dates, no gaps etc

In [None]:
print(INSTR)
db_multiple_prices = diag_prices.db_futures_multiple_prices_data
#db_multiple_prices = multiple_prices["2023-07-01":"2023-12-31"] # slice by date
db_multiple_prices.get_multiple_prices(INSTR).plot(figsize=(15,9), legend=False, lw=1, grid=True)

## generate adjusted prices

In [None]:
process_adjusted_prices_single_instrument(INSTR, ADD_TO_CSV=False)

## check adjusted prices

In [None]:
print(INSTR)
db_adjusted_prices = diag_prices.db_futures_adjusted_prices_data
#db_adjusted_prices.get_adjusted_prices(INSTR).plot(figsize=(15,9), legend=False, lw=1, grid=True)
db_adjusted_prices.get_adjusted_prices("VNKI").plot(figsize=(15,9), legend=False, lw=1, grid=True)

## Update sampled contracts

In [None]:
# update_sampled_contracts()
$ python pysystemtrade/sysproduction/update_sampled_contracts.py

In [None]:
process_adjusted_prices_single_instrument(INSTR, ADD_TO_CSV=True)