In [19]:
# data processing
import datetime
import logging
import warnings
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Union

import datarobot as dr
import polars as pl
from datarobotx.idp.autopilot import get_or_create_autopilot_run

log = logging.getLogger(__name__)

dr_logger = logging.getLogger("datarobot.models.project")
for handler in dr_logger.handlers:
    if isinstance(handler, logging.StreamHandler):
        # log.info(f"Removing handler {handler}")
        dr_logger.removeHandler(handler)


warnings.filterwarnings(
    action="ignore", category=pl.exceptions.PolarsInefficientMapWarning
)
warnings.filterwarnings(action="ignore", category=dr.errors.DataRobotDeprecationWarning)



class FeatureSelectionMethod: ...


def load(path: str) -> tuple[TrainingData, ExternalHoldoutData]: ...


def variable_downsampling(data: Data) -> Data: ...


def target_engineering(data: Data) -> Data: ...


def feature_selection(data: Data, method: FeatureSelectionMethod) -> Data: ...

In [20]:
pwd

[32m'/home/lukas/code/testing/test_x_flow/recipe-xflow'[0m

In [21]:
training_data = TrainingData.load(
    load_path=Path("include/x_flow/raw_data/DR_Demo_Bond_trading_RFQ_train.csv"),
    date_column="date",
    date_partition_column=[datetime.datetime(2018, 6, 3), datetime.datetime(2018, 6, 24)],
    date_format="%d/%m/%Y",
    target_column="Mid",
)
test_data = ExternalHoldoutData.load(
    load_path=Path("include/x_flow/raw_data/DR_Demo_Bond_trading_RFQ_test.csv"),
    date_column="date",
    date_format="%d/%m/%Y",
    target_column="Mid",
)

In [22]:
import datarobotx
from copy import deepcopy

In [23]:
from abc import ABC, abstractmethod

from utils.operator import Operator



In [24]:
from utils.fire import FIRE as FireHelper
from datarobotx.idp.autopilot import get_or_create_autopilot_run
from datarobotx.idp.datasets import get_or_create_dataset_from_df
from datarobotx.idp.common.hashing import get_hash


In [26]:
binarizer = BinarizeData(
    threshold=100, operator="<", binarize_drop_regression_target=True)

fire = FIRE(
    endpoint="https://app.datarobot.com/api/v2",
    token="NWQ1NDA3YTVmNTU1Y2QxZDQxNmQ1YTZjOklqemlsaHJucTZtdU1NVThMLWpCVldJeEhpYUhOcFVo",
    reduction_method="Rank Aggregation",
)

In [27]:
binarized_data = binarizer.fit_transform(training_data)
fire.fit(binarized_data)

[1m<[0m[1;95m__main__.FIRE[0m[39m object at [0m[1;36m0x7f506f045df0[0m[1m>[0m

In [28]:
training_data.rendered_df.head()

request_id,date,cusip,BidAsk,Mid,yield_to_maturity,years_to_maturity,years_since_issue,is_bench_02y,is_bench_05y,is_bench_10y,is_bench_30y,IssueName,AmountOut,issue_date,Coupon,Currency,maturity_date,counterparty_id,counterparty_type,name,counterparty_aum,ann_account_value_bln,tier,salesperson,salesperson_num_ac_covered,notional_EURm,notional_bps_total_issue,log_notional,resp_bidAsk_norm,response_revenue_EUR,num_brokers,trade_won
i64,datetime[μs],str,f64,f64,f64,f64,f64,bool,bool,bool,bool,str,f64,str,f64,str,str,str,str,str,f64,f64,str,str,i64,i64,f64,f64,f64,f64,i64,bool
1777,2018-01-04 00:00:00,"""D2R8H4AK""",0.0227,102.16795,-0.522589,3.263585,2.507923,False,False,False,False,"""Government of Germany 0.25% 16…",20713.79945,"""03/07/2015""",0.25,"""Euro""","""10/04/2021""","""LO057""","""LO""","""Fayetteville Investments""",81.673308,33.523547,"""C""","""Meghan""",41,29,14.000329,1.462398,2.719329,4028.957313,10,True
1778,2018-03-06 00:00:00,"""D20658Z9""",0.1442,128.7033,1.284039,29.005387,4.01651,False,False,False,True,"""Government of Germany 2.5% 15-…",30525.59919,"""28/02/2014""",2.5,"""Euro""","""08/03/2047""","""LO111""","""LO""","""Knoxville Pensions""",114.119618,30.439696,"""C""","""Meghan""",41,208,68.139531,2.318063,12.510871,167553.6102,8,False
1779,2018-09-05 00:00:00,"""D206585A""",0.028,100.987,-0.607714,2.661246,3.616775,False,False,False,False,"""Government of Germany 0.0% 17-…",21803.99942,"""23/01/2015""",0.0,"""Euro""","""04/05/2021""","""LO140""","""LO""","""Cincinnati Capital""",210.929638,46.848707,"""A""","""Kate""",9,135,61.915247,2.130334,2.954119,20139.9049,9,False
1780,2019-08-14 00:00:00,"""D2R8H4DM""",0.023,109.0565,-0.721392,9.566247,1.086949,False,False,True,False,"""Government of Germany 0.25% 15…",22894.19939,"""13/07/2018""",0.25,"""Euro""","""08/03/2029""","""LO087""","""LO""","""Unalaska Advisors""",117.150554,14.189058,"""C""","""William""",15,99,43.242394,1.995635,2.661497,14369.06668,11,False
1781,2018-04-25 00:00:00,"""D20659WR""",0.0242,106.5311,-0.492709,1.957604,7.685305,True,False,False,False,"""Government of Germany 2.25% 04…",17443.19954,"""18/08/2010""",2.25,"""Euro""","""09/04/2020""","""LO090""","""LO""","""Jackson Trust""",135.431575,50.701944,"""B""","""Louis""",3,228,130.709965,2.357935,2.299022,27923.75113,9,False


In [29]:
reduced_data = fire.transform(binarizer.transform(training_data))

In [30]:
dataset_id = get_or_create_dataset_from_df(
    endpoint="https://app.datarobot.com/api/v2",
    token="NWQ1NDA3YTVmNTU1Y2QxZDQxNmQ1YTZjOklqemlsaHJucTZtdU1NVThMLWpCVldJeEhpYUhOcFVo",
    data_frame=reduced_data.rendered_df.to_pandas(),
    name="reduced_data_dataset",
)
project_id = get_or_create_autopilot_run(
    endpoint="https://app.datarobot.com/api/v2",
    token="NWQ1NDA3YTVmNTU1Y2QxZDQxNmQ1YTZjOklqemlsaHJucTZtdU1NVThMLWpCVldJeEhpYUhOcFVo",
    dataset_id=dataset_id,
    name="reduced_data_project",
    analyze_and_model_config={
        "target": reduced_data.target_column,
        "mode": "quick",
        "max_wait": 10000,
        "worker_count": -1,
    },
)


In [44]:
def p(x):
    return x+1

f = lambda check, x: check and p(x)  # noqa: E731

In [45]:
f(True, 4)

[1;36m5[0m

In [1]:
df = catalog.load("experiment.backtests_grouped")

In [2]:
df


[1m{[0m
    [32m'__all_data__/664f6b545985fcc9a513c951/664f6bb42574c507a9635697.csv/data'[0m: [1m<[0m[1;95mbound[0m[39m method AbstractDataset.load of <x_flow.utils.data.XFlowDataset object at [0m[1;36m0x7f3121f8f710[0m[39m>>,[0m
[39m    [0m[32m'__all_data__/664f6b545985fcc9a513c951/664f6bb42574c507a9635698.csv/data'[0m[39m: <bound method AbstractDataset.load of <x_flow.utils.data.XFlowDataset object at [0m[1;36m0x7f3166198d40[0m[39m>>,[0m
[39m    [0m[32m'__all_data__/664f6b545985fcc9a513c951/664f6bb42574c507a9635699.csv/data'[0m[39m: <bound method AbstractDataset.load of <x_flow.utils.data.XFlowDataset object at [0m[1;36m0x7f3122283470[0m[39m>>,[0m
[39m    [0m[32m'__all_data__/664f6b545985fcc9a513c951/664f6bb42574c507a963569a.csv/data'[0m[39m: <bound method AbstractDataset.load of <x_flow.utils.data.XFlowDataset object at [0m[1;36m0x7f3121e8ff80[0m[39m>[0m[1m>[0m
[1m}[0m

In [4]:
df['__all_data__/664f6b545985fcc9a513c951/664f6bb42574c507a9635697.csv/data']()


[1;35mValidationPredictionData[0m[1m([0m
    [33mdf[0m=     Unnamed: [1;36m0[0m        date  BBBI Global Aggregate  MSCI AC World  S&P GSCI  \
[1;36m0[0m            [1;36m22[0m  [1;36m2020[0m-[1;36m06[0m-[1;36m09[0m               [1;36m0.001346[0m      [1;36m-0.006076[0m  [1;36m0.008632[0m   
[1;36m1[0m            [1;36m23[0m  [1;36m2020[0m-[1;36m06[0m-[1;36m10[0m               [1;36m0.001401[0m      [1;36m-0.003265[0m  [1;36m0.009312[0m   
[1;36m2[0m            [1;36m23[0m  [1;36m2020[0m-[1;36m06[0m-[1;36m10[0m               [1;36m0.001401[0m      [1;36m-0.003265[0m  [1;36m0.009312[0m   
[1;36m3[0m            [1;36m23[0m  [1;36m2020[0m-[1;36m06[0m-[1;36m10[0m               [1;36m0.001401[0m      [1;36m-0.003265[0m  [1;36m0.009312[0m   
[1;36m4[0m            [1;36m24[0m  [1;36m2020[0m-[1;36m06[0m-[1;36m11[0m               [1;36m0.002412[0m      [1;36m-0.045679[0m [1;36m-0.038599[0m   
..          