In [None]:
%pip install scikit-hts[prophet] calcbench-api-client plotly

In [None]:
from datetime import datetime
from hts import HTSRegressor
from hts.utilities.load_data import load_hierarchical_sine_data
import hts.functions
import calcbench as cb
import pandas as pd
from collections import defaultdict

In [None]:
tickers = cb.tickers(index='DJIA')

In [None]:
api_data = cb.standardized_data(
    company_identifiers=tickers,
    metrics=["GrossProfit", "SIC_Code"],
    all_history=True,
    period_type="annual",
)

In [None]:
one_digit_SIC_codes = pd.to_numeric(
    api_data["SIC_Code"].iloc[0] // 1000, downcast="integer"
).astype(str)
one_digit_SIC_codes.name = "SIC_Code"

d = api_data.drop(columns="SIC_Code", level="metric")

d = d.drop(d.index[0]) # drop the 0000 column left over from the SIC_Code

d = d.stack().join(one_digit_SIC_codes).set_index("SIC_Code", append=True)

d.index = d.index.set_levels(d.index.levels[0].to_timestamp(), level=0)

heirarchy_sets = defaultdict(set)
for _, ticker, SIC_code in d.index.to_list():
    heirarchy_sets["total"].add(SIC_code)
    heirarchy_sets[SIC_code].add(f"{SIC_code}_{ticker}")
heirarchy = {key: list(values) for key, values in heirarchy_sets.items()}

d = d.reset_index()

In [None]:
ht, _, _ = hts.functions.get_hierarchichal_df(
    d,
    level_names=["SIC_Code", "ticker"],
    date_colname="period",
    val_colname="GrossProfit",
    hierarchy=[
        ["SIC_Code"],
        ["ticker"],
    ],
)

In [None]:
ht.index.freq='AS-JAN' # have to set the  index frequency for HTS

In [None]:
clf = HTSRegressor(model="prophet", revision_method="OLS", n_jobs=0)

In [None]:
model = clf.fit(ht, heirarchy)

In [None]:
predicted_autoarima = model.predict(steps_ahead=4, freq='A')

In [None]:
predicted_autoarima