In [2]:
from typing import Any
import pandas as pd
from pydantic import BaseModel, Field, field_validator

import logging

log = logging.getLogger(__name__)

class Operation(BaseModel):
    operation: str = Field(...)

    @field_validator("operation")
    def normalize_operation(cls, v):
        operation_mapping = {
            "<": ["lt", "less than", "less"],
            ">": ["gt", "greater than", "greater"],
            "<=": ["lte", "less than or equal to", "less equal"],
            ">=": ["gte", "greater than or equal to", "greater equal"],
            "==": ["eq", "equal to", "equals", "equal"],
            "!=": ["ne", "not equal to", "not equals", "not equal"],
        }

        for op, aliases in operation_mapping.items():
            if v.lower() in aliases + [op]:
                return op

        raise ValueError(f"""Invalid operation: {v}
         allowed Values:
           {operation_mapping}
        """)

In [3]:
Operation(operation="gt")

[1;35mOperation[0m[1m([0m[33moperation[0m=[32m'>'[0m[1m)[0m

In [4]:
!uv pip install polars

[2K[2mResolved [1m1 package[0m in 197ms[0m                                                  [0m
[2K[2mDownloaded [1m1 package[0m in 3.50s[0m1                                      [0m
[2K[2mInstalled [1m1 package[0m in 20ms[0m.21                                      [0m
 [32m+[39m [1mpolars[0m[2m==0.20.21[0m


In [5]:
import polars as pl

In [6]:
df = pl.read_csv("../include/x_flow/raw_data/DR_Demo_Bond_trading_RFQ.csv")

In [9]:
splits = pl.read_csv("../include/x_flow/raw_data/search_space.csv")

In [11]:
splits

Unnamed: 0_level_0,target_definition,forecast_distance,feature_derivation,project,project_id
i64,str,i64,i64,str,str
0,"""Regression""",1,1,"""Project(big_tr…","""6465059f6a385c…"
1,"""Regression""",1,5,"""Project(big_tr…","""646507016a385c…"
2,"""Regression""",1,15,"""Project(big_tr…","""64650bedbe7eeb…"
3,"""Regression""",2,1,"""Project(big_tr…","""64651034d067f6…"
4,"""Regression""",2,5,"""Project(big_tr…","""646511a0be7eeb…"
…,…,…,…,…,…
16,"""200""",3,5,"""Project(big_tr…","""646536e6ee2491…"
17,"""200""",3,15,"""Project(big_tr…","""64653924cefe8e…"
18,"""500""",1,1,"""Project(big_tr…","""64653e54cefe8e…"
19,"""500""",1,5,"""Project(big_tr…","""64653feeee2491…"


In [7]:
df.head()

request_id,date,cusip,BidAsk,Mid,yield_to_maturity,years_to_maturity,years_since_issue,is_bench_02y,is_bench_05y,is_bench_10y,is_bench_30y,IssueName,AmountOut,issue_date,Coupon,Currency,maturity_date,counterparty_id,counterparty_type,name,counterparty_aum,ann_account_value_bln,tier,salesperson,salesperson_num_ac_covered,notional_EURm,notional_bps_total_issue,log_notional,resp_bidAsk_norm,response_revenue_EUR,num_brokers,trade_won
i64,str,str,f64,f64,f64,f64,f64,bool,bool,bool,bool,str,f64,str,f64,str,str,str,str,str,f64,f64,str,str,i64,i64,f64,f64,f64,f64,i64,bool
1777,"""04/01/2018""","""D2R8H4AK""",0.0227,102.16795,-0.522589,3.263585,2.507923,False,False,False,False,"""Government of …",20713.79945,"""03/07/2015""",0.25,"""Euro""","""10/04/2021""","""LO057""","""LO""","""Fayetteville I…",81.673308,33.523547,"""C""","""Meghan""",41,29,14.000329,1.462398,2.719329,4028.957313,10,True
1778,"""06/03/2018""","""D20658Z9""",0.1442,128.7033,1.284039,29.005387,4.01651,False,False,False,True,"""Government of …",30525.59919,"""28/02/2014""",2.5,"""Euro""","""08/03/2047""","""LO111""","""LO""","""Knoxville Pens…",114.119618,30.439696,"""C""","""Meghan""",41,208,68.139531,2.318063,12.510871,167553.6102,8,False
1779,"""05/09/2018""","""D206585A""",0.028,100.987,-0.607714,2.661246,3.616775,False,False,False,False,"""Government of …",21803.99942,"""23/01/2015""",0.0,"""Euro""","""04/05/2021""","""LO140""","""LO""","""Cincinnati Cap…",210.929638,46.848707,"""A""","""Kate""",9,135,61.915247,2.130334,2.954119,20139.9049,9,False
1780,"""14/08/2019""","""D2R8H4DM""",0.023,109.0565,-0.721392,9.566247,1.086949,False,False,True,False,"""Government of …",22894.19939,"""13/07/2018""",0.25,"""Euro""","""08/03/2029""","""LO087""","""LO""","""Unalaska Advis…",117.150554,14.189058,"""C""","""William""",15,99,43.242394,1.995635,2.661497,14369.06668,11,False
1781,"""25/04/2018""","""D20659WR""",0.0242,106.5311,-0.492709,1.957604,7.685305,True,False,False,False,"""Government of …",17443.19954,"""18/08/2010""",2.25,"""Euro""","""09/04/2020""","""LO090""","""LO""","""Jackson Trust""",135.431575,50.701944,"""B""","""Louis""",3,228,130.709965,2.357935,2.299022,27923.75113,9,False


In [8]:
df["Currency"].unique()

Currency
str
"""Euro"""


In [43]:
import datarobot as dr
import inspect
import yaml

params_function_mapping = {
    "DatetimePartitioningSpecification": list(inspect.signature(dr.DatetimePartitioningSpecification).parameters.keys()),
    "AdvancedOptions":list(inspect.signature(dr.AdvancedOptions).parameters.keys()),
    "analyze_and_model":list(inspect.signature(dr.Project.analyze_and_model).parameters.keys())}

mapping = {param: dr_class  for (dr_class, params) in params_function_mapping.items() for param in params if param != "self"}

In [45]:
print(yaml.dump(mapping))

accuracy_optimized_mb: AdvancedOptions
advanced_options: analyze_and_model
aggregation_type: DatetimePartitioningSpecification
allow_partial_history_time_series_predictions: DatetimePartitioningSpecification
allowed_pairwise_interaction_groups: AdvancedOptions
autopilot_cluster_list: analyze_and_model
autopilot_data_sampling_method: AdvancedOptions
autopilot_data_selection_method: DatetimePartitioningSpecification
autopilot_with_feature_discovery: AdvancedOptions
backtests: DatetimePartitioningSpecification
bias_mitigation_feature_name: AdvancedOptions
bias_mitigation_technique: AdvancedOptions
blend_best_models: AdvancedOptions
blueprint_threshold: AdvancedOptions
calendar_id: DatetimePartitioningSpecification
class_mapping_aggregation_settings: analyze_and_model
consider_blenders_in_recommendation: AdvancedOptions
credentials: analyze_and_model
cross_series_group_by_columns: DatetimePartitioningSpecification
datetime_partition_column: DatetimePartitioningSpecification
default_monoton