In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
import sys
import datetime


sys.path.append('..')
from utils.tte import tte


DATA_DIR = Path("../data/cl/processed") 

rolled_raw = pd.read_csv(DATA_DIR / "cl_front_month_raw.csv")
panama_rolled = pd.read_csv(DATA_DIR / "cl_front_month_panama_adjusted.csv")
proportional_rolled = pd.read_csv(DATA_DIR / "cl_front_month_proportional_adjusted.csv")

spreads = pd.read_csv(DATA_DIR / "cl_spreads.csv")


In [None]:
raw = pd.read_csv("../data/cl/processed/cl_raw.csv", index_col=0, parse_dates=True)
raw[["letters", "number"]] = raw["symbol"].str.extract(r"([a-zA-Z]+)([0-9]+)")
raw["number"] = raw["number"].astype(int)
raw = raw.sort_values(by=["ts_event", "number", "letters"]).reset_index()
raw["month"] = raw["letters"].str[-1]
raw = raw.drop(columns=["letters"])

In [None]:
raw[["ts_event", "symbol", "volume", "month", "number"]]

In [None]:
raw["expiration_year"] = np.where((raw["ts_event"].dt.year//10 *10)+raw["number"]<raw["ts_event"].dt.year,((raw["ts_event"].dt.year//10)+1)*10,(raw["ts_event"].dt.year//10)*10) + raw["number"] #type: ignore

In [None]:
month_codes = "FGHJKMNQUVXZ"

conversions = {
    "F": "-01-20",
    "G": "-02-20",
    "H": "-03-20",
    "J": "-04-20",
    "K": "-05-20",
    "M": "-06-20",
    "N": "-07-20",
    "Q": "-08-20",
    "U": "-09-20",
    "V": "-10-20",
    "X": "-11-20",
    "Z": "-12-20"
}

#pattern = rf'^CL[{month_codes}]\d$'

raw["expiration_date"] = pd.to_datetime((raw["expiration_year"].astype(str) + raw["month"].map(conversions))) #type: ignore
raw["expiration_date"] = raw["expiration_date"].dt.tz_localize("UTC") #type: ignore
raw


In [None]:
raw["tte"] = (raw["expiration_date"] - raw["ts_event"]).dt.days / 365.25 #type: ignore

In [None]:
#raw = raw.drop(columns=["number", "month", "expiration_year"])
spreads[["ts_event", "symbol", "close", "volume"]]


In [None]:
first_month = rf'^CL[{month_codes}]\d$'
second_month = rf'^CL[{month_codes}]\d$'


#spreads[["letters"]] = spreads["symbol"].str.extract() #type: ignore
spreads[["month_from", "year_from", "month_to", "year_to"]] = spreads["symbol"].str.extract(rf"CL([{month_codes}])([0-9])-CL([{month_codes}])([0-9])")
spreads = spreads[["ts_event", "symbol", "close", "volume", "month_from", "year_from", "month_to", "year_to"]]
spreads["year_from"] = spreads["year_from"].astype(int)
spreads["year_to"] = spreads["year_to"].astype(int)
spreads["ts_event"] = pd.to_datetime(spreads["ts_event"])

In [None]:

#spreads["from_date"] = pd.to_datetime(((np.where((spreads["ts_event"].dt.year//10 *10)+spreads["year_from"]<spreads["ts_event"].dt.year,2020,2010) + spreads["year_from"]).astype(str)+ spreads["month_from"].map(conversions))).dt.tz_localize('UTC') #type: ignore
#spreads["to_date"] = pd.to_datetime(((np.where((spreads["ts_event"].dt.year//10 *10)+spreads["year_to"]<spreads["ts_event"].dt.year,2020,2010) + spreads["year_to"]).astype(str)+ spreads["month_to"].map(conversions))).dt.tz_localize('UTC') #type: ignore
#
#spreads["year"] = spreads["ts_event"].dt.year//10 * 10 #type: ignore
#spreads["number"] = spreads["year_from"]
#spreads["test_year"] = spreads["year"] + spreads["number"]
#spreads["test2"] = np.where(spreads["test_year"]<spreads["ts_event"].dt.year, spreads["test_year"]+10, spreads["test_year"]) #type: ignore
#spreads[["ts_event", "tte", "year", "year_from", "test_year", "test2", "from_date"]]


In [None]:


spreads["from_date"] = pd.to_datetime(((np.where((spreads["ts_event"].dt.year//10 *10)+spreads["year_from"]<spreads["ts_event"].dt.year,((spreads["ts_event"].dt.year//10)+1)*10,(spreads["ts_event"].dt.year//10)*10) + spreads["year_from"]).astype(str)+ spreads["month_from"].map(conversions))).dt.tz_localize('UTC') #type: ignore
spreads["to_date"] = pd.to_datetime(((np.where((spreads["ts_event"].dt.year//10 *10)+spreads["year_to"]<spreads["ts_event"].dt.year,((spreads["ts_event"].dt.year//10)+1)*10,(spreads["ts_event"].dt.year//10)*10) + spreads["year_to"]).astype(str)+ spreads["month_to"].map(conversions))).dt.tz_localize('UTC') #type: ignore



spreads["tenor"] = (spreads["to_date"] - spreads["from_date"]).dt.days / 365.25 #type: ignore
spreads["tte"] = (spreads["to_date"] - spreads["ts_event"]).dt.days / 365.25 #type: ignore

spreads.sort_values(by=["ts_event", "year_from", "month_from", "tenor"], inplace=True)

spreads.reset_index(drop=True, inplace=True)
spreads


In [None]:
raw = raw[raw["ts_event"] >= pd.to_datetime("2011-11-27").tz_localize("UTC")].reset_index(drop=True)
raw[["ts_event", "symbol", "close", "volume", "month", "expiration_date", "tte"]]

In [None]:
df = raw.loc[raw["ts_event"] >= pd.to_datetime("2011-11-27").tz_localize("UTC")]
df = df[["ts_event", "symbol", "close", "volume", "month",  "number", "tte"]]
df

In [None]:
dfs = spreads.loc[spreads["ts_event"] >= pd.to_datetime("2011-11-27").tz_localize("UTC")]
dfs = dfs[["ts_event", "symbol", "close", "volume", "month_from", "year_from", "tte"]]
dfs = dfs.rename(columns={"month_from": "month", "year_from": "number"})
dfs

In [None]:
target = pd.concat([df, dfs], ignore_index=True)
target.sort_values(by=["tte", "volume"], inplace=True, ascending=[True, False])
target.reset_index(drop=True, inplace=True)
target = target.groupby(["ts_event", "tte"]).first().reset_index()
target[["ts_event", "symbol", "volume", "tte"]]

In [None]:
def nelson_siegel(tau, beta0, beta1, beta2, lambda_):
    term1 = (1 - np.exp(-tau / lambda_)) / (tau / lambda_)
    term2 = term1 - np.exp(-tau / lambda_)
    return beta0 + beta1 * term1 + beta2 * term2





In [None]:
def objective(x, tau, close):
    beta0, beta1, beta2, lambda_ = x
    
    return loss