# SB Results: Bad Bids 

## Imports

In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
import pathlib
import warnings
warnings.filterwarnings("ignore")

import pathlib
import pandas as pd
from pathlib import Path
import numpy as np
from datetime import timedelta

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.offline as pyo



from src.download import download_experiment_data, download_recprice_data, download_order_data, download_bid_data
from src.metrics import calculate_absolute_metrics, get_switchback_results, calculate_ratio_metrics
from src.prepare import prepare_recprice_data, prepare_order_data, prepare_bid_data, get_orders_with_recprice_df, get_hex, add_algo_name_new
from src.visualization import plot_conversions_by_time, plot_prices_by_time, plot_times_by_time
from src.visualization import plot_metric_by_time, plot_metric_by_hex, plot_switches_matrix
from src.distributions import plot_density_anime, plot_density, plot_density_simple, plot_density_overlay
from src.heatmap import plot_heatmap


## __Panama [, 5548]__

- https://ab.aws.indriverapp.com/switchbacks/city/3366/

### Parameters

Essential metrics

In [10]:
from src.metrics import METRIC_LIST

NOT_ESSENTIAL_METRIC_LIST = [
    # market
    ["cp2bid", "start_price_bid_orders_count", "calcprices_count"],
    ["cp2start_price_bid", "start_price_bid_orders_count", "calcprices_count"],
    ["cp2done", "rides_count", "calcprices_count"],
    ['tenders_per_order', 'tenders_count', 'orders_count'],    
    ['tenders_per_driver', 'tenders_count', 'drivers_count'],
    ['tenders_per_order_with_bid', 'tenders_count', 'orders_with_bids_count'],
    
    # prices
    ['price_bid_option_other1', 'bids_option_other1_bid_price_currency_sum', 'bids_option_other1_count'],
    ['price_bid_option_other2', 'bids_option_other2_bid_price_currency_sum', 'bids_option_other2_count'],
    ['price_bid_option_other3', 'bids_option_other3_bid_price_currency_sum', 'bids_option_other3_count'],
    ["price_tender_usd", "price_tender_usd_sum", "tenders_count"],
    
    # orders
    # order2bid
    ['order2bid_option_other1', 'orders_with_bid_option_other1_count', 'orders_count'],
    ['order2bid_option_other2', 'orders_with_bid_option_other2_count', 'orders_count'],
    ['order2bid_option_other3', 'orders_with_bid_option_other3_count', 'orders_count'],
    # order2accept
    ['order2accept_option_other1', 'accepted_bids_option_other1_count', 'orders_count'],
    ['order2accept_option_other2', 'accepted_bids_option_other2_count', 'orders_count'],
    ['order2accept_option_other3', 'accepted_bids_option_other3_count', 'orders_count'],
    # order2done
    ['order2done_bid_option_other1', 'rides_by_bid_option_other1_count', 'orders_count'],
    ['order2done_bid_option_other2', 'rides_by_bid_option_other2_count', 'orders_count'],
    ['order2done_bid_option_other3', 'rides_by_bid_option_other3_count', 'orders_count'],
    # bid2accept
    ['bid_option_other12accept', 'accepted_bids_option_other1_count', 'orders_with_bid_option_other1_count'],
    ['bid_option_other22accept', 'accepted_bids_option_other2_count', 'orders_with_bid_option_other2_count'],
    ['bid_option_other32accept', 'accepted_bids_option_other3_count', 'orders_with_bid_option_other3_count'],
    # accept2done
    ['accepted_bids_option_other12done', 'rides_by_bid_option_other1_count', 'accepted_bids_option_other1_count'],
    ['accepted_bids_option_other22done', 'rides_by_bid_option_other2_count', 'accepted_bids_option_other2_count'],
    ['accepted_bids_option_other32done', 'rides_by_bid_option_other3_count', 'accepted_bids_option_other3_count'],
    # bid2done
    ['bid_option_other12done', 'rides_by_bid_option_other1_count', 'orders_with_bid_option_other1_count'],
    ['bid_option_other22done', 'rides_by_bid_option_other2_count', 'orders_with_bid_option_other2_count'],
    ['bid_option_other32done', 'rides_by_bid_option_other3_count', 'orders_with_bid_option_other3_count'],

    # bids
    # options shares
    ['bids_option_startprice_share', 'bids_option_startprice_count', 'bids_count'],
    ['bids_option_option1_share', 'bids_option_option1_count', 'bids_count'],
    ['bids_option_option2_share', 'bids_option_option2_count', 'bids_count'],
    ['bids_option_option3_share', 'bids_option_option3_count', 'bids_count'],
    ['bids_option_other1_share', 'bids_option_other1_count', 'bids_count'],
    ['bids_option_other2_share', 'bids_option_other2_count', 'bids_count'],
    ['bids_option_other3_share', 'bids_option_other3_count', 'bids_count'],
    ## share of accepted 
    ['accepted_bids_share', 'accepted_bids_count', 'bids_count'],
    ['accepted_share_bid_option_startprice', 'accepted_bids_option_startprice_count', 'bids_option_startprice_count'],
    ['accepted_share_bid_option_option1', 'accepted_bids_option_option1_count', 'bids_option_option1_count'],
    ['accepted_share_bid_option_option2', 'accepted_bids_option_option2_count', 'bids_option_option2_count'],
    ['accepted_share_bid_option_option3', 'accepted_bids_option_option3_count', 'bids_option_option3_count'],
    ['accepted_share_bid_option_other1', 'accepted_bids_option_other1_count', 'bids_option_other1_count'],
    ['accepted_share_bid_option_other2', 'accepted_bids_option_other2_count', 'bids_option_other2_count'],
    ['accepted_share_bid_option_other3', 'accepted_bids_option_other3_count', 'bids_option_other3_count'],
    ## share of done 
    ['done_share_bid_option_startprice', 'rides_by_bid_option_startprice_count', 'accepted_bids_option_startprice_count'],
    ['done_share_bid_option_option1', 'rides_by_bid_option_option1_count', 'accepted_bids_option_option1_count'],
    ['done_share_bid_option_option2', 'rides_by_bid_option_option2_count', 'accepted_bids_option_option2_count'],
    ['done_share_bid_option_option3', 'rides_by_bid_option_option3_count', 'accepted_bids_option_option3_count'],
    ['done_share_bid_option_other1', 'rides_by_bid_option_other1_count', 'accepted_bids_option_other1_count'],
    ['done_share_bid_option_other2', 'rides_by_bid_option_other2_count', 'accepted_bids_option_other2_count'],
    ['done_share_bid_option_other3', 'rides_by_bid_option_other3_count', 'accepted_bids_option_other3_count'],

    # times
    #
]

ESSENTIAL_METRIC_LIST = [item for item in METRIC_LIST if item not in NOT_ESSENTIAL_METRIC_LIST]


Bad Bids Experiment Specific Parameters

In [11]:
t = 270
alpha = 0.2

Mutable

In [12]:
EXP_ID = 3366
USER_NAME = 'nusuev_sb'+str(EXP_ID)

ORDER_TYPE = 'auto_econom'
ORDER_TYPE_ID = 1

DAYS_BEFORE = 15

Immutable

In [13]:
DATA_ROOT_PATH = pathlib.Path(f'data/exp_id={EXP_ID}')
if not DATA_ROOT_PATH.exists():
    DATA_ROOT_PATH.mkdir(parents=True, exist_ok=True)
    

PLOT_ROOT_PATH = pathlib.Path(f'plots/exp_id={EXP_ID}')
if not PLOT_ROOT_PATH.exists():
    PLOT_ROOT_PATH.mkdir(parents=True, exist_ok=True)

### __Experiment Data__

#### Download

In [7]:
df_exp = download_experiment_data(exp_id=EXP_ID, user_name=USER_NAME)

df_exp['hour'] = df_exp['switch_start_dttm'].dt.hour
df_exp['hour'] = df_exp['hour'].astype('category')
df_exp['weekday_name'] = df_exp['switch_start_dttm'].dt.day_name()
df_exp['weekday_name'] = df_exp['weekday_name'].astype('category')

df_exp.to_parquet(DATA_ROOT_PATH / 'df_exp.pqt')

EXP_START_DATE = df_exp.utc_start_dttm.dt.date.astype('str').iloc[0]
EXP_STOP_DATE = df_exp.utc_finish_dttm.dt.date.astype('str').iloc[0]
BEFORE_START_DATE = (df_exp.utc_start_dttm.dt.date - timedelta(days=DAYS_BEFORE)).astype('str').iloc[0]
CITY_ID = df_exp.city_id.iloc[0]
EXP_NAME = df_exp.exp_name.iloc[0]

print(
    f"""
    before_start_date: {BEFORE_START_DATE}
    exp_start_date: {EXP_START_DATE}
    exp_stop_date: {EXP_STOP_DATE}
    city_id: {CITY_ID}
    exp_name: {EXP_NAME}
    """
)

#### Check Validity

Switch Splitting. Total.

In [8]:
df_exp.groupby('group_name').size()

Switch Splitting. By week day and hour.

In [9]:
plot_switches_matrix(
    df_exp,
    plot_root_path=PLOT_ROOT_PATH,
    is_show=True,
)

### __Metrics Data__

#### Recprice

In [10]:
df_recprice = download_recprice_data(
    start_date=BEFORE_START_DATE,
    stop_date=EXP_STOP_DATE,
    city_id=CITY_ID,
    user_name=USER_NAME,
    printBool=False
)
df_recprice.to_parquet(DATA_ROOT_PATH / 'df_recprice.pqt')

from datetime import datetime
print('Downloaded at', datetime.now().date(), datetime.now().time())

In [8]:
df_recprice = pd.read_parquet(DATA_ROOT_PATH / 'df_recprice.pqt')
df_recprice_prepared = prepare_recprice_data(df_recprice)
df_recprice_prepared.to_parquet(DATA_ROOT_PATH / 'df_recprice_prepared.pqt')

#### Bids

In [12]:
df_bids = download_bid_data(
    start_date=BEFORE_START_DATE,
    stop_date=EXP_STOP_DATE,
    city_id=CITY_ID,
    user_name=USER_NAME,
    printBool=False
)

df_bids.to_parquet(DATA_ROOT_PATH / 'df_bids.pqt')

from datetime import datetime
print('Downloaded at', datetime.now().date(), datetime.now().time())

In [None]:
df_bids = pd.read_parquet(DATA_ROOT_PATH / 'df_bids.pqt')
df_bids_prepared = prepare_bid_data(df_bids, t_param=t)
df_bids_prepared.to_parquet(DATA_ROOT_PATH / 'df_bids_prepared.pqt')

#### Orders (with recprice)

In [10]:
df_orders = download_order_data(
    start_date=BEFORE_START_DATE,
    stop_date=EXP_STOP_DATE,
    city_id=CITY_ID,
    user_name=USER_NAME,
    printBool=False
)
df_orders.to_parquet(DATA_ROOT_PATH / 'df_orders.pqt')

from datetime import datetime
print('Downloaded at', datetime.now().date(), datetime.now().time())

In [10]:
df_orders = pd.read_parquet(DATA_ROOT_PATH / 'df_orders.pqt')
df_orders_prepared = prepare_order_data(df_orders)
df_orders_prepared.to_parquet(DATA_ROOT_PATH / 'df_orders_prepared.pqt')

In [None]:
df_orders_with_recprice = get_orders_with_recprice_df(df_orders_prepared, df_recprice_prepared)
df_orders_with_recprice['group_name'] = df_orders_with_recprice['recprice_group_name']
df_orders_with_recprice.to_parquet(DATA_ROOT_PATH / 'df_orders_with_recprice.pqt')

### __Total Results__

In [17]:
for var in ['df_recprice_prepared', 'df_orders_with_recprice', 'df_bids_prepared',
            'df_recprice', 'df_orders', 'df_bids']:
    try:
        del globals()[var]
    except:
        pass

df_recprice_prepared = pd.read_parquet(DATA_ROOT_PATH / 'df_recprice_prepared.pqt')
df_orders_with_recprice = pd.read_parquet(DATA_ROOT_PATH / 'df_orders_with_recprice.pqt')
df_bids_prepared = pd.read_parquet(DATA_ROOT_PATH / 'df_bids_prepared.pqt')

In [18]:
pd.reset_option('display.max_rows')

df_metrics_total = calculate_absolute_metrics(
    df_recprice_prepared,
    df_orders_with_recprice,
    df_bids_prepared,
    group_cols=['group_name', 'switch_start_dttm', 'switch_finish_dttm'],
)

metrics_total_tbl = get_switchback_results(df_metrics_total, alpha=0.05)[
    ['metric', 'control_value', 'experimental_value', 'uplift_rel', 'pvalue', 'is_significant']
]

metrics_total_tbl.head()

### __Results by Segment__

Specify Segment

In [14]:
for var in ['df_recprice_prepared', 'df_orders_with_recprice', 'df_bids_prepared',
            'df_recprice', 'df_orders', 'df_bids']:
    try:
        del globals()[var]
    except:
        pass

df_recprice_prepared = pd.read_parquet(DATA_ROOT_PATH / 'df_recprice_prepared.pqt')
df_orders_with_recprice = pd.read_parquet(DATA_ROOT_PATH / 'df_orders_with_recprice.pqt')
df_bids_prepared = pd.read_parquet(DATA_ROOT_PATH / 'df_bids_prepared.pqt')

condition_on_recprice = df_recprice_prepared.order_type_id == ORDER_TYPE_ID
condition_on_orders = df_orders_with_recprice.order_type == ORDER_TYPE
condition_on_bids = df_bids_prepared.order_type == ORDER_TYPE

df_recprice_prepared = df_recprice_prepared[condition_on_recprice]
df_orders_with_recprice = df_orders_with_recprice[condition_on_orders]
df_bids_prepared = df_bids_prepared[condition_on_bids]

In [15]:
# df_bids_prepared = pd.read_parquet(DATA_ROOT_PATH / 'df_bids_prepared.pqt')
# condition_on_bids = df_bids_prepared.order_type == ORDER_TYPE
# df_bids_prepared = df_bids_prepared[condition_on_bids]
# df_orders_with_recprice = pd.read_parquet(DATA_ROOT_PATH / 'df_orders_with_recprice.pqt')
# condition_on_orders = df_orders_with_recprice.order_type == ORDER_TYPE
# df_orders_with_recprice = df_orders_with_recprice[condition_on_orders]


Calculate Metrics

In [16]:
pd.set_option('display.max_rows', None)

df_metrics_total = calculate_absolute_metrics(
    df_recprice_prepared[condition_on_recprice].copy(),
    df_orders_with_recprice[condition_on_orders].copy(),
    df_bids_prepared[condition_on_bids].copy(),
    group_cols=['group_name', 'switch_start_dttm', 'switch_finish_dttm'],
)

metrics_total_tbl = get_switchback_results(df_metrics_total, alpha=0.05)[
    ['metric', 'control_value', 'experimental_value', 'uplift_rel', 'pvalue', 'is_significant']
]

metrics_total_tbl.to_csv(DATA_ROOT_PATH / 'metrics_total_tbl.csv', index=False)
print(f"""csv file saved to {DATA_ROOT_PATH / 'metrics_total_tbl.csv'}""")

metrics_total_tbl

csv file saved to data/exp_id=3366/metrics_total_tbl.csv


Unnamed: 0,metric,control_value,experimental_value,uplift_rel,pvalue,is_significant
0,cp2order,0.594602,0.596149,0.002603,0.5413194,False
1,cp2bid,0.376822,0.381153,0.011492,0.4286132,False
2,cp2start_price_bid,0.376822,0.381153,0.011492,0.4286132,False
3,cp2accept,0.424115,0.429804,0.013414,0.2874417,False
4,cp2done,0.371235,0.376337,0.013743,0.2885376,False
5,drivers_per_order,1.072087,1.080846,0.00817,0.5208532,False
6,bids_per_order,2.124729,2.101939,-0.010726,0.3997233,False
7,tenders_per_order,1.816846,1.806234,-0.005841,0.6868151,False
8,bids_per_driver,1.981863,1.944717,-0.018743,4.380078e-22,True
9,tenders_per_driver,1.694682,1.671131,-0.013897,1.064209e-06,True


Show Some Metrics

In [17]:
ordered_metrics = [
    'bid2start_price_ratio', 'bid_option_startprice2start_price_ratio',
    'bid_option_option12start_price_ratio', 'bid_option_option22start_price_ratio',
    'bid_option_option32start_price_ratio'
]

filtered = metrics_total_tbl[metrics_total_tbl['metric'].isin(ordered_metrics)].copy()
filtered['metric'] = pd.Categorical(filtered['metric'], categories=ordered_metrics, ordered=True)
filtered = filtered.sort_values('metric').reset_index(drop=True)
filtered


Unnamed: 0,metric,control_value,experimental_value,uplift_rel,pvalue,is_significant
0,bid2start_price_ratio,1.092489,1.078884,-0.012453,3.938098e-138,True
1,bid_option_startprice2start_price_ratio,0.998399,0.998433,3.4e-05,0.5784428,False
2,bid_option_option12start_price_ratio,1.107368,1.09023,-0.015476,0.0,True
3,bid_option_option22start_price_ratio,1.195339,1.168387,-0.022548,0.0,True
4,bid_option_option32start_price_ratio,1.290502,1.245828,-0.034618,0.0,True


In [18]:
ordered_metrics = [
    'bids_per_order', 'order2bid', 'bids_per_order_with_bid', 
    'bid2accept', 'order2accept', 'order2done', 
    'price_bid_currency', 'price_bid_currency_accepted_bids'
]

filtered = metrics_total_tbl[metrics_total_tbl['metric'].isin(ordered_metrics)].copy()
filtered['metric'] = pd.Categorical(filtered['metric'], categories=ordered_metrics, ordered=True)
filtered = filtered.sort_values('metric').reset_index(drop=True)
filtered


Unnamed: 0,metric,control_value,experimental_value,uplift_rel,pvalue,is_significant
0,bids_per_order,2.124729,2.101939,-0.010726,0.399723,False
1,order2bid,0.840624,0.843389,0.00329,0.632834,False
2,bids_per_order_with_bid,2.527563,2.492253,-0.01397,0.030686,True
3,bid2accept,0.848508,0.854845,0.007468,0.008804,True
4,order2accept,0.713276,0.720967,0.010783,0.256994,False
5,order2done,0.624343,0.63128,0.011112,0.258132,False
6,price_bid_currency,4.537231,4.454398,-0.018256,0.000574,True
7,price_bid_currency_accepted_bids,3.995235,3.976942,-0.004579,0.31814,False


In [19]:
ordered_metrics = [
    'order2accept', 'startprice_accepts_per_order',
    'option1_accepts_per_order', 'option2_accepts_per_order', 'option3_accepts_per_order', 
]

filtered = metrics_total_tbl[metrics_total_tbl['metric'].isin(ordered_metrics)].copy()
filtered['metric'] = pd.Categorical(filtered['metric'], categories=ordered_metrics, ordered=True)
filtered = filtered.sort_values('metric').reset_index(drop=True)
filtered


Unnamed: 0,metric,control_value,experimental_value,uplift_rel,pvalue,is_significant
0,order2accept,0.713276,0.720967,0.010783,0.256994,False
1,startprice_accepts_per_order,0.639831,0.643218,0.005292,0.6134757,False
2,option1_accepts_per_order,0.044769,0.034866,-0.221194,3.772254e-94,True
3,option2_accepts_per_order,0.048564,0.050434,0.038516,0.000740113,True
4,option3_accepts_per_order,0.027672,0.041368,0.494937,8.704503e-171,True


In [20]:
ordered_metrics = [
    'accepted_bids_share', 'startprice_AR',
    'option1_AR', 'option2_AR', 'option3_AR', 
]

filtered = metrics_total_tbl[metrics_total_tbl['metric'].isin(ordered_metrics)].copy()
filtered['metric'] = pd.Categorical(filtered['metric'], categories=ordered_metrics, ordered=True)
filtered = filtered.sort_values('metric').reset_index(drop=True)
filtered


Unnamed: 0,metric,control_value,experimental_value,uplift_rel,pvalue,is_significant
0,accepted_bids_share,0.358096,0.366287,0.022872,3.348713e-07,True
1,startprice_AR,0.525406,0.520011,-0.010269,0.04202474,True
2,option1_AR,0.256445,0.264921,0.033053,0.005515124,True
3,option2_AR,0.139932,0.160013,0.143508,2.607075e-34,True
4,option3_AR,0.071822,0.098925,0.377372,2.309109e-147,True


In [21]:
ordered_metrics = [
    'bids_per_order', 'bids_startprice_per_order',
    'bids_option1_per_order', 'bids_option2_per_order', 'bids_option3_per_order', 
    # 'bids_other1_per_order', 'bids_other2_per_order', 'bids_other3_per_order'
]

filtered = metrics_total_tbl[metrics_total_tbl['metric'].isin(ordered_metrics)].copy()
filtered['metric'] = pd.Categorical(filtered['metric'], categories=ordered_metrics, ordered=True)
filtered = filtered.sort_values('metric').reset_index(drop=True)
filtered


Unnamed: 0,metric,control_value,experimental_value,uplift_rel,pvalue,is_significant
0,bids_per_order,2.124729,2.101939,-0.010726,0.3997233,False
1,bids_startprice_per_order,1.217784,1.236932,0.015723,0.2784068,False
2,bids_option1_per_order,0.174576,0.131611,-0.246112,1.067047e-95,True
3,bids_option2_per_order,0.347053,0.315188,-0.091816,6.231012e-13,True
4,bids_option3_per_order,0.385287,0.418173,0.085355,1.303567e-10,True


In [22]:
ordered_metrics = [
    'price_bid_currency', 'price_bid_option_startprice',
    'price_bid_option_option1', 'price_bid_option_option2', 'price_bid_option_option3', 
]

filtered = metrics_total_tbl[metrics_total_tbl['metric'].isin(ordered_metrics)].copy()
filtered['metric'] = pd.Categorical(filtered['metric'], categories=ordered_metrics, ordered=True)
filtered = filtered.sort_values('metric').reset_index(drop=True)
filtered


Unnamed: 0,metric,control_value,experimental_value,uplift_rel,pvalue,is_significant
0,price_bid_currency,4.537231,4.454398,-0.018256,0.0005736822,True
1,price_bid_option_startprice,3.93946,3.948112,0.002196,0.660157,False
2,price_bid_option_option1,4.744576,4.833961,0.018839,0.05801121,False
3,price_bid_option_option2,4.856045,4.738989,-0.024105,1.731831e-05,True
4,price_bid_option_option3,6.045408,5.618027,-0.070695,8.554242e-36,True


In [23]:
ordered_metrics = [
    'price_bid_currency_accepted_bids', 'price_bid_currency_accepted_bids_startprice',
    'price_bid_currency_accepted_bids_option1', 'price_bid_currency_accepted_bids_option2', 'price_bid_currency_accepted_bids_option3', 
]

filtered = metrics_total_tbl[metrics_total_tbl['metric'].isin(ordered_metrics)].copy()
filtered['metric'] = pd.Categorical(filtered['metric'], categories=ordered_metrics, ordered=True)
filtered = filtered.sort_values('metric').reset_index(drop=True)
filtered


Unnamed: 0,metric,control_value,experimental_value,uplift_rel,pvalue,is_significant
0,price_bid_currency_accepted_bids,3.995235,3.976942,-0.004579,0.3181403,False
1,price_bid_currency_accepted_bids_startprice,3.824134,3.81174,-0.003241,0.4479965,False
2,price_bid_currency_accepted_bids_option1,4.602749,4.686,0.018087,0.01740445,True
3,price_bid_currency_accepted_bids_option2,4.678334,4.526077,-0.032545,2.159183e-08,True
4,price_bid_currency_accepted_bids_option3,5.768778,5.278288,-0.085025,8.54267e-45,True


In [24]:
ordered_metrics = [
    'etr', 'etr_orders_without_bids', 'etr_orders_with_bids',
    'etr_orders_with_accepted_bids', 'etr_orders_without_accepted_bids',
    'etr_done_orders'
]

filtered = metrics_total_tbl[metrics_total_tbl['metric'].isin(ordered_metrics)].copy()
filtered['metric'] = pd.Categorical(filtered['metric'], categories=ordered_metrics, ordered=True)
filtered = filtered.sort_values('metric').reset_index(drop=True)
filtered


Unnamed: 0,metric,control_value,experimental_value,uplift_rel,pvalue,is_significant
0,etr,957.108631,952.91104,-0.004386,0.449412,False
1,etr_orders_without_bids,1009.698824,1009.709557,1.1e-05,0.999158,False
2,etr_orders_with_bids,947.137894,942.364004,-0.00504,0.340592,False
3,etr_orders_with_accepted_bids,918.882313,914.448785,-0.004825,0.344833,False
4,etr_orders_without_accepted_bids,1105.397745,1106.761908,0.001234,0.851654,False
5,etr_done_orders,902.013116,896.754218,-0.00583,0.257969,False


In [25]:
ordered_metrics = [
    'eta', 
    'eta_accepted_bids',
    'eta_done_bids',
    'rta'
]

filtered = metrics_total_tbl[metrics_total_tbl['metric'].isin(ordered_metrics)].copy()
filtered['metric'] = pd.Categorical(filtered['metric'], categories=ordered_metrics, ordered=True)
filtered = filtered.sort_values('metric').reset_index(drop=True)
filtered


Unnamed: 0,metric,control_value,experimental_value,uplift_rel,pvalue,is_significant
0,eta,290.646642,289.549577,-0.003775,0.26889,False
1,eta_accepted_bids,263.36134,261.821249,-0.005848,0.055984,False
2,eta_done_bids,255.216078,253.942227,-0.004991,0.094661,False
3,rta,268.677461,266.704715,-0.007342,0.115555,False


In [26]:
ordered_metrics = [
    'time_to_1st_bid_sec', 
    'time_1st_bid_to_accept_sec'
]

filtered = metrics_total_tbl[metrics_total_tbl['metric'].isin(ordered_metrics)].copy()
filtered['metric'] = pd.Categorical(filtered['metric'], categories=ordered_metrics, ordered=True)
filtered = filtered.sort_values('metric').reset_index(drop=True)
filtered


Unnamed: 0,metric,control_value,experimental_value,uplift_rel,pvalue,is_significant
0,time_to_1st_bid_sec,34.836108,34.56465,-0.007792,0.7350257,False
1,time_1st_bid_to_accept_sec,20.79916,19.905966,-0.042944,8.425031e-08,True


In [27]:
pd.reset_option('display.max_rows')

### __Plots__

##### Distributions

Get Data

In [28]:
GROUP_COLS = ['group_name', 'switch_start_dttm', 'switch_finish_dttm']
df_metrics_ratios= calculate_ratio_metrics(df_metrics_total)
df_metrics_ratios = df_metrics_ratios[GROUP_COLS + pd.DataFrame(ESSENTIAL_METRIC_LIST)[0].tolist()]

Draw figures

In [None]:
plot_density_anime(df_metrics_ratios, 
                   metrics=df_metrics_ratios.columns.difference(GROUP_COLS), 
                   title='Metrics Distribution (switches)')

In [None]:
plot_density_overlay(df_bids_prepared.groupby(['group_name', 'order_uuid'])['bid_uuid'].nunique().reset_index(name='bids_per_order_with_bid'),
                      metric='bids_per_order_with_bid', title='', bins=10, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_density_overlay(df_bids_prepared, metric='bid_price_currency', title='all bids', bins=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_density_overlay(df_bids_prepared[df_bids_prepared['is_bid_accepted'] == True], metric='bid_price_currency',
                     bins=20, title='(accepted bids)', PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_density_overlay(df_bids_prepared, metric='bid2rec', bins=50, title='all bids', PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_density_overlay(df_bids_prepared[df_bids_prepared['is_bid_accepted'] == True], metric='bid2rec', bins=50, 
                     title='accepted bids', PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
def plot_ratio_scatter_line(data, metric, bins=30, 
                            height=600, width=1100, title=None):
    # 1. Filter only relevant groups
    subset = data.loc[data['group_name'].isin(['Control', 'A']), 
                      [metric, 'group_name', 'is_bid_accepted', 'bid_uuid']].copy()

    # 2. Clip metric to common 1–99 percentile
    lower = min(subset[subset['group_name'] == 'Control'][metric].quantile(0.01),
                subset[subset['group_name'] == 'A'][metric].quantile(0.01))
    upper = max(subset[subset['group_name'] == 'Control'][metric].quantile(0.99),
                subset[subset['group_name'] == 'A'][metric].quantile(0.99))
    
    subset[metric] = subset[metric].clip(lower=lower, upper=upper)

    # 3. Create bin edges and assign intervals
    bin_edges = np.linspace(lower, upper, bins + 1)
    subset['metric_bin'] = pd.cut(subset[metric], bins=bin_edges, include_lowest=True)

    # 4. Group by group and bin, and calculate counts
    grouped = (
        subset.groupby(['group_name', 'metric_bin'])
        .agg(
            numerator=('is_bid_accepted', 'sum'),
            denominator=('bid_uuid', 'count')
        )
        .reset_index()
    )

    # 5. Compute ratio
    grouped['ratio'] = grouped['numerator'] / grouped['denominator']

    # 6. X-axis as bin start (left)
    grouped['bin_left'] = grouped['metric_bin'].apply(lambda x: x.left)

    # 7. Plot
    fig = go.Figure()
    for group, color in zip(['Control', 'A'], ['steelblue', 'indianred']):
        group_data = grouped[grouped['group_name'] == group]
        fig.add_trace(go.Scatter(
            x=group_data['bin_left'],
            y=group_data['ratio'],
            mode='lines+markers',
            name=group,
            line=dict(color=color),
            marker=dict(size=8)
        ))

    fig.add_trace(go.Scatter(
            x=grouped[grouped['group_name'] == 'Control']['bin_left'],
            y=(grouped[grouped['group_name'] == 'A']['ratio'].reset_index(drop=True) - 
               grouped[grouped['group_name'] == 'Control']['ratio'].reset_index(drop=True)) / 
               grouped[grouped['group_name'] == 'Control']['ratio'].reset_index(drop=True),
            mode='lines+markers',
            name='Rel Diff',
            line=dict(color='mediumseagreen'),
            marker=dict(size=8)
        ))

    fig.update_layout(
        title=f"Accepted / Total (per bin)",
        xaxis_title=f"{metric}",
        yaxis_title="Share",
        template="simple_white",
        height=height,
        width=width,
    )

    return fig

plot_ratio_scatter_line(df_bids_prepared, metric='bid2rec', bins=20)


In [None]:
plot_density_overlay(df_bids_prepared, metric='bidMPH2recMPH', bins=20, title='all bids', PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_density_overlay(df_bids_prepared[df_bids_prepared['is_bid_accepted'] == True], metric='bidMPH2recMPH', bins=20,
                     title='accepted bids', PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_ratio_scatter_line(df_bids_prepared, metric='bidMPH2recMPH', bins=20)


In [None]:
plot_density_overlay(df_orders_with_recprice, 
                     metric='duration_sec', bins=100, title='all orders', PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_density_overlay(df_orders_with_recprice[df_orders_with_recprice['is_order_with_tender'] == True], 
                     metric='duration_sec', bins=100, title='orders with tender', PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_density_overlay(df_orders_with_recprice[df_orders_with_recprice['is_order_with_tender'] == False], 
                     metric='duration_sec', bins=100, title='orders with no tender', PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_density_overlay(df_orders_with_recprice[(df_orders_with_recprice['is_order_with_tender'] == True) & (df_orders_with_recprice['is_order_accepted'] == True)], 
                     metric='duration_sec', bins=50, title='orders with tender accepted', PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_density_overlay(df_orders_with_recprice[(df_orders_with_recprice['is_order_with_tender'] == True) & (df_orders_with_recprice['is_order_accepted'] == False)], 
                     metric='duration_sec', bins=50, title='orders with tender NOT accepted', PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_density_overlay(df_bids_prepared, metric='eta', bins=50, title='all bids', PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

##### Essential Heatmap

In [None]:
from src.metrics import metric_price_bid_done_currency_sum, metric_orders_count
from src.metrics import metric_accepted_bids_option_startprice_count, metric_accepted_bids_option_option1_count
from src.metrics import metric_accepted_bids_option_option2_count, metric_accepted_bids_option_option3_count
from src.metrics import metric_bids_count, metric_bids_option_option3_count, metric_eta_sum, metric_accepted_bids_count, metric_bids_bid_price_currency_sum, metric_accepted_bids_bid_price_currency_sum


plot_heatmap(df_bids_prepared, 
             metric='Accepts per ordersTOTAL', 
             numerator_func=metric_accepted_bids_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
             #zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1, 
             #num_bins=5
             )

plot_heatmap(df_bids_prepared, 
             metric='Accepts_SP per ordersTOTAL', 
             numerator_func=metric_accepted_bids_option_startprice_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='Accepts_Op1 per ordersTOTAL', 
             numerator_func=metric_accepted_bids_option_option1_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='Accepts_Op2 per ordersTOTAL', 
             numerator_func=metric_accepted_bids_option_option2_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='Accepts_Op3 per ordersTOTAL', 
             numerator_func=metric_accepted_bids_option_option3_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

In [None]:
from src.metrics import metric_bids_option_startprice_count, metric_bids_option_option1_count
from src.metrics import metric_bids_option_option2_count, metric_bids_option_option3_count

plot_heatmap(df_bids_prepared, 
             metric='bids per orderTOTAL', 
             numerator_func=metric_bids_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=None, zmax2=None
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='Bids_SP per ordersTOTAL', 
             numerator_func=metric_bids_option_startprice_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='Bids_Op1 per ordersTOTAL', 
             numerator_func=metric_bids_option_option1_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='Bids_Op2 per ordersTOTAL', 
             numerator_func=metric_bids_option_option2_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='Bids_Op3 per ordersTOTAL', 
             numerator_func=metric_bids_option_option3_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

In [None]:
from src.metrics import metric_price_bid_done_option_startprice_currency_sum, metric_price_bid_done_option_option1_currency_sum
from src.metrics import metric_price_bid_done_option_option2_currency_sum, metric_price_bid_done_option_option3_currency_sum
from src.metrics import metric_price_bid_done_currency_sum, metric_orders_count

plot_heatmap(df_bids_prepared, 
             metric='GMV per ordersTotal', 
             numerator_func=metric_price_bid_done_currency_sum,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=None, zmax2=None,
             num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='GMV_SP per ordersTOTAL', 
             numerator_func=metric_price_bid_done_option_startprice_currency_sum,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='GMV_Op1 per ordersTOTAL', 
             numerator_func=metric_price_bid_done_option_option1_currency_sum,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='GMV_Op2 per ordersTOTAL', 
             numerator_func=metric_price_bid_done_option_option2_currency_sum,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='GMV_Op3 per ordersTOTAL', 
             numerator_func=metric_price_bid_done_option_option3_currency_sum,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

In [None]:
def metric_total_bids_count(df, group_cols):
    grouped = df.groupby(group_cols).size().reset_index(name='temp')
    grouped['total_bids_count'] = len(df)
    return grouped[group_cols + ['total_bids_count']]

plot_heatmap(df_bids_prepared, 
             metric='bids_dist', 
             numerator_func=metric_bids_count,
             denominator_func=metric_total_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [7]:
from src.sanitize_results import sanitize_heatmaps_folder

result = sanitize_heatmaps_folder(PLOT_ROOT_PATH, EXP_ID)
result

Found 24 essential pictures in the list
Moved from distributions: 3366_bidMPH2recMPH_all_bids.png
Moved from distributions: 3366_bid2rec_all_bids.png
Moved from distributions: 3366_bid_price_currency_all_bids.png
Moved from distributions: 3366_bids_per_order_with_bid.png
Moved from heatmaps: 3366_Accepts_SP per ordersTOTAL_Rel_Diff.png
Moved from heatmaps: 3366_Accepts_Op1 per ordersTOTAL_Rel_Diff.png
Moved from heatmaps: 3366_Accepts_Op2 per ordersTOTAL_Rel_Diff.png
Moved from heatmaps: 3366_Accepts_Op3 per ordersTOTAL_Rel_Diff.png
Moved from heatmaps: 3366_Accepts per ordersTOTAL_Rel_Diff.png
Moved from heatmaps: 3366_Bids_SP per ordersTOTAL_Rel_Diff.png
Moved from heatmaps: 3366_Bids_Op1 per ordersTOTAL_Rel_Diff.png
Moved from heatmaps: 3366_Bids_Op2 per ordersTOTAL_Rel_Diff.png
Moved from heatmaps: 3366_Bids_Op3 per ordersTOTAL_Rel_Diff.png
Moved from heatmaps: 3366_bids per orderTOTAL_Rel_Diff.png
Moved from heatmaps: 3366_GMV_SP per ordersTOTAL_Rel_Diff.png
Moved from heatmaps: 3

{'moved_from_heatmaps': ['3366_Accepts_SP per ordersTOTAL_Rel_Diff.png',
  '3366_Accepts_Op1 per ordersTOTAL_Rel_Diff.png',
  '3366_Accepts_Op2 per ordersTOTAL_Rel_Diff.png',
  '3366_Accepts_Op3 per ordersTOTAL_Rel_Diff.png',
  '3366_Accepts per ordersTOTAL_Rel_Diff.png',
  '3366_Bids_SP per ordersTOTAL_Rel_Diff.png',
  '3366_Bids_Op1 per ordersTOTAL_Rel_Diff.png',
  '3366_Bids_Op2 per ordersTOTAL_Rel_Diff.png',
  '3366_Bids_Op3 per ordersTOTAL_Rel_Diff.png',
  '3366_bids per orderTOTAL_Rel_Diff.png',
  '3366_GMV_SP per ordersTOTAL_Rel_Diff.png',
  '3366_GMV_Op1 per ordersTOTAL_Rel_Diff.png',
  '3366_GMV_Op2 per ordersTOTAL_Rel_Diff.png',
  '3366_GMV_Op3 per ordersTOTAL_Rel_Diff.png',
  '3366_GMV per ordersTotal_Rel_Diff.png',
  '3366_bids_dist_A.png',
  '3366_bids_dist_Control.png',
  '3366_bids_dist_Rel_Diff.png'],
 'moved_from_distributions': ['3366_bidMPH2recMPH_all_bids.png',
  '3366_bid2rec_all_bids.png',
  '3366_bid_price_currency_all_bids.png',
  '3366_bids_per_order_with_bid.p

##### Heatmap of the Algorithm

Get Data and Check Validity

In [50]:
GROUP_COLS = ['group_name', 'switch_start_dttm', 'switch_finish_dttm']

In [51]:
# Prepare data

# condition_on_bids = df_bids_prepared.order_type == ORDER_TYPE
# df_bids_filtered = df_bids_prepared[condition_on_bids].copy()

df_bids_prepared = add_algo_name_new(df_bids_prepared, t, alpha)
print(f"""
t: {t}sec, 
alpha: {alpha} \n
""")

In [52]:
# Создаем три разных подсчета
total_counts = df_bids_prepared.groupby('group_name').size()
bidmph_counts = df_bids_prepared[df_bids_prepared['bidding_algorithm_name'] == 'bid_mph'].groupby('group_name').size()
algo_bidmph_counts = df_bids_prepared[df_bids_prepared['algo_name_new'] == 'algo_bidmph'].groupby('group_name').size()

# Объединяем результаты в один датафрейм
result = pd.DataFrame({
    'Всего строк': total_counts,
    'Строк bidmph': bidmph_counts,
    'Строк algo_bidmph': algo_bidmph_counts,
    'Доля algo_bidmph': (algo_bidmph_counts / total_counts).round(2)
})

# Заполняем NaN нулями, если какие-то группы не имеют значений
result = result.fillna(0)

# Сортируем по общему количеству строк
result = result.sort_values('Всего строк', ascending=False)

print("Распределение по group_name:")
print(result)

# # Добавим итоговую строку
# print("\nИтого:")
# print(result.sum())

del total_counts, bidmph_counts, algo_bidmph_counts, result

In [53]:
from src.metrics import metric_bids_count, metric_bids_option_option3_count, metric_eta_sum, metric_accepted_bids_count, metric_bids_bid_price_currency_sum, metric_accepted_bids_bid_price_currency_sum

def metric_algo_bidmph_bids_count(df, group_cols):
    return (df[df.algo_name_new.str.contains('algo_bidmph')]
            .groupby(group_cols)
            .agg(algo_bidmph_bids_count=('bid_uuid', 'nunique'))
            .reset_index())

def metric_algo_bidmph_accepted_bids_count(df, group_cols):
    return (df[(df.is_bid_accepted) & (df.algo_name_new.str.contains('algo_bidmph'))]
            .groupby(group_cols)
            .agg(algo_bidmph_accepted_bids_count=('bid_uuid', 'nunique'))
            .reset_index())

def metric_algo_default_bids_count(df, group_cols):
    return (df[df.algo_name_new.str.contains('algo_default')]
            .groupby(group_cols)
            .agg(algo_default_bids_count=('bid_uuid', 'nunique'))
            .reset_index())

def metric_algo_default_accepted_bids_count(df, group_cols):
    return (df[(df.is_bid_accepted) & (df.algo_name_new.str.contains('algo_default'))]
            .groupby(group_cols)
            .agg(algo_default_accepted_bids_count=('bid_uuid', 'nunique'))
            .reset_index())

def metric_ones(df, group_cols):
    grouped = df.groupby(group_cols).size().reset_index(name='temp')
    grouped['ones'] = 1
    return grouped[group_cols + ['ones']]

def metric_total_bids_count(df, group_cols):
    grouped = df.groupby(group_cols).size().reset_index(name='temp')
    grouped['total_bids_count'] = len(df)
    return grouped[group_cols + ['total_bids_count']]

def metric_total_accepts_count(df, group_cols):
    grouped = df.groupby(group_cols).size().reset_index(name='temp')
    grouped['total_accepts_count'] = len(df[df['is_bid_accepted'] == True])
    return grouped[group_cols + ['total_accepts_count']]

def metric_algo_bidmph_price_diff_sum(df, group_cols):
    return (df[df.algo_name_new.str.contains('algo_bidmph')]
            .groupby(group_cols)
            .agg(price_diff_sum=('price_diff', 'sum'))
            .reset_index())

def metric_bidmph2recmph_sum(df, group_cols):
    return (df
            .groupby(group_cols)
            .agg(price_diff_sum=('bidMPH2recMPH', 'sum'))
            .reset_index())

def metric_bids_bidmph_count(df, group_cols):
    return (df[df.algo_name_new.str.contains('algo_bidmph')]
            .groupby(group_cols)
            .agg(bids_bidmph_count=('bid_uuid', 'nunique'))
            .reset_index())

def metric_total_bids_bidmph_count(df, group_cols):
    grouped = df.groupby(group_cols).size().reset_index(name='temp')
    grouped['total_bids_bidmph_count'] = len(df[df.algo_name_new.str.contains('algo_bidmph')])
    return grouped[group_cols + ['total_bids_bidmph_count']]

def metric_available_prices_max_perc_sum(df, group_cols):
    return (df
            .groupby(group_cols)
            .agg(price_diff_sum=('price_diff', 'sum'))
            .reset_index())

def metric_available_prices_unique_sum(df, group_cols):
    return (df
            .groupby(group_cols)
            .agg(price_diff_sum=('unique_available_prices', 'sum'))
            .reset_index())

def metric_algo_bidmph_count(df, group_cols):
    return (df[df['algo_name_new'] == 'algo_bidmph']
            .groupby(group_cols)
            .agg(algo_bidmph_count=('algo_name_new', 'count'))
            .reset_index())

In [54]:
tmp = metric_algo_bidmph_count(df_bids_prepared.copy(), GROUP_COLS)
tmp = tmp.merge(metric_bids_count(df_bids_prepared.copy(), GROUP_COLS), on=GROUP_COLS, how='left')
tmp = tmp.merge(metric_algo_default_bids_count(df_bids_prepared.copy(), GROUP_COLS), on=GROUP_COLS, how='left')
tmp = tmp.merge(metric_algo_bidmph_bids_count(df_bids_prepared.copy(), GROUP_COLS), on=GROUP_COLS, how='left')
tmp = get_switchback_results(tmp, alpha=0.05, metric_list=[['badbids_share', 'algo_bidmph_count', 'bids_count'],
                                                           ['algo_bidmph_per_algo_default', 'algo_bidmph_bids_count', 'algo_default_bids_count'],
                                                           ['test', 'algo_bidmph_bids_count', 'algo_bidmph_count']])[
    ['metric', 'control_value', 'experimental_value', 'uplift_rel', 'pvalue', 'is_significant']
]
tmp

In [55]:
condition_1 = df_bids_prepared.eta >= 300

tmp = metric_algo_bidmph_count(df_bids_prepared[condition_1].copy(), GROUP_COLS).merge(metric_bids_count(df_bids_prepared[condition_1].copy(), GROUP_COLS), on=GROUP_COLS, how='left')
tmp = get_switchback_results(tmp, alpha=0.05, metric_list=[['badbids_share', 'algo_bidmph_count', 'bids_count']])[
    ['metric', 'control_value', 'experimental_value', 'uplift_rel', 'pvalue', 'is_significant']
]
tmp

In [56]:
condition_2 = df_bids_prepared.eta < 300

tmp = metric_algo_bidmph_count(df_bids_prepared[condition_2].copy(), GROUP_COLS)
tmp = tmp.merge(metric_bids_count(df_bids_prepared[condition_2].copy(), GROUP_COLS), on=GROUP_COLS, how='left')
tmp = tmp.merge(metric_algo_default_bids_count(df_bids_prepared[condition_2].copy(), GROUP_COLS), on=GROUP_COLS, how='left')
tmp = tmp.merge(metric_algo_bidmph_bids_count(df_bids_prepared[condition_2].copy(), GROUP_COLS), on=GROUP_COLS, how='left')
tmp = get_switchback_results(tmp, alpha=0.05, metric_list=[['badbids_share', 'algo_bidmph_count', 'bids_count'],
                                                           ['algo_bidmph_per_algo_default', 'algo_bidmph_bids_count', 'algo_default_bids_count']])[
    ['metric', 'control_value', 'experimental_value', 'uplift_rel', 'pvalue', 'is_significant']
]
tmp

In [57]:
condition_3 = df_bids_prepared.etr < 1000

tmp = metric_algo_bidmph_count(df_bids_prepared[condition_3].copy(), GROUP_COLS).merge(metric_bids_count(df_bids_prepared[condition_3].copy(), GROUP_COLS), on=GROUP_COLS, how='left')
tmp = get_switchback_results(tmp, alpha=0.05, metric_list=[['badbids_share', 'algo_bidmph_count', 'bids_count']])[
    ['metric', 'control_value', 'experimental_value', 'uplift_rel', 'pvalue', 'is_significant']
]
tmp

In [58]:
condition_4 = df_bids_prepared.etr >= 1000

tmp = metric_algo_bidmph_count(df_bids_prepared[condition_4].copy(), GROUP_COLS).merge(metric_bids_count(df_bids_prepared[condition_4].copy(), GROUP_COLS), on=GROUP_COLS, how='left')
tmp = get_switchback_results(tmp, alpha=0.05, metric_list=[['badbids_share', 'algo_bidmph_count', 'bids_count']])[
    ['metric', 'control_value', 'experimental_value', 'uplift_rel', 'pvalue', 'is_significant']
]
tmp

Heatmap

In [59]:
plot_heatmap(df_bids_prepared, 
             metric='bids_dist', 
             numerator_func=metric_bids_count,
             denominator_func=metric_total_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [60]:
plot_heatmap(df_bids_prepared, 
             metric='accepts_dist', 
             numerator_func=metric_accepted_bids_count,
             denominator_func=metric_total_accepts_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [61]:
plot_heatmap(df_bids_prepared, 
             metric='price_bid_currency', 
             numerator_func=metric_bids_bid_price_currency_sum,
             denominator_func=metric_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [62]:
plot_heatmap(df_bids_prepared, 
             metric='price_bid_currency_accepted_bids', 
             numerator_func=metric_accepted_bids_bid_price_currency_sum,
             denominator_func=metric_accepted_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='badbids_share', 
             numerator_func=metric_algo_bidmph_bids_count,
             denominator_func=metric_bids_count,
             min_samples=20,
             zmin2=-0.15, zmax2=0.15, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='algo_bidmph_per_algo_default', 
             numerator_func=metric_algo_bidmph_bids_count,
             denominator_func=metric_algo_default_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='bidmph2recmph', 
             numerator_func=metric_bidmph2recmph_sum,
             denominator_func=metric_bids_count,
             min_samples=20, zmid1=1.0+alpha, 
             PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='bids_bidmph_dist', 
             numerator_func=metric_bids_bidmph_count,
             denominator_func=metric_total_bids_bidmph_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='bids_option_option3_dist', 
             numerator_func=metric_bids_option_option3_count,
             denominator_func=metric_total_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='eta_avg', 
             numerator_func=metric_eta_sum,
             denominator_func=metric_total_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='accepted_share', 
             numerator_func=metric_accepted_bids_count,
             denominator_func=metric_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='algo_bidmph2accept', 
             numerator_func=metric_algo_bidmph_accepted_bids_count,
             denominator_func=metric_algo_bidmph_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='algo_bidmph_AR', 
             numerator_func=metric_algo_bidmph_accepted_bids_count,
             denominator_func=metric_algo_default_accepted_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='algo_bidmph_count', 
             numerator_func=metric_algo_bidmph_bids_count,
             denominator_func=metric_ones,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='algo_bidmph_accepted_count', 
             numerator_func=metric_algo_bidmph_accepted_bids_count,
             denominator_func=metric_ones,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='algo_bidmph_available_prices_max_perc', 
             numerator_func=metric_algo_bidmph_price_diff_sum,
             denominator_func=metric_algo_bidmph_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.32, zmax1=0.36,
             zmin1=0.15, zmax1=0.25
            )

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='algo_bidmph_accepted_count', 
             numerator_func=metric_available_prices_unique_sum,
             denominator_func=metric_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared, metric='accepted_bids_share', numerator_func=metric_accepted_bids_count, denominator_func=metric_bids_count, 
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

##### vs ETR

In [42]:
condition_on_recprice = df_recprice_prepared.order_type_id == ORDER_TYPE_ID
condition_on_orders = df_orders_with_recprice.order_type == ORDER_TYPE
condition_on_bids = df_bids_prepared.order_type == ORDER_TYPE

In [43]:
def create_duration_bin(df):
    df['duration_sec'] = df['duration_in_min'] * 60
    duration_percentile = df['duration_sec'].quantile(0.99)
    duration_max = int(np.ceil(duration_percentile / 60.0)) * 60 

    df['duration_bin'] = pd.cut(
        df['duration_sec'],
        bins=np.arange(0, duration_max, 60*5),
        labels=[f"{i}" for i in range(0, duration_max - 60*5, 60*5)],
        right=False
        )
    return df

In [44]:
def create_duration_bin(df, reference_df, num_bins=30):
    # Convert minutes to seconds in both dataframes if needed
    df['duration_sec'] = df['duration_in_min'] * 60
    
    if 'duration_sec' not in reference_df.columns:
        reference_df = reference_df.copy()
        reference_df['duration_sec'] = reference_df['duration_in_min'] * 60
    
    # Calculate bin edges from reference dataframe
    bin_edges = [reference_df['duration_sec'].min()]
    bin_edges.extend([
        reference_df['duration_sec'].quantile(q) 
        for q in np.linspace(0, 1, num_bins+1)[1:]
    ])
    bin_edges = sorted(list(set(bin_edges)))  # Remove duplicates and sort
    # print(bin_edges)
    # Create labels based on percentile ranges
    labels = bin_edges[:-1]
    
    # Apply these bins to the target dataframe
    df['duration_bin'] = pd.cut(
        df['duration_sec'],
        bins=bin_edges,
        labels=labels,
        right=False
    )
    
    return df

In [45]:
# Create a single reference dataframe for consistent binning
reference_df = df_orders_with_recprice.copy()  # Use complete dataset as reference

df_metrics_total = calculate_absolute_metrics(
    df_order_with_recprice=create_duration_bin(
        df_orders_with_recprice[condition_on_orders].copy(),
        reference_df=reference_df
    ), 
    df_bid=create_duration_bin(
        df_bids_prepared,
        reference_df=reference_df
    ), 
    group_cols=['group_name', 'duration_bin']
)

df_metrics_vs_ETR = calculate_ratio_metrics(df_metrics_total, metric_list=ESSENTIAL_METRIC_LIST)

In [None]:
# fig = go.Figure()
# for metric in [column for column in df_metrics_vs_ETR.columns if column not in ['group_name', 'duration_bin'] + pd.DataFrame(ESSENTIAL_METRIC_LIST)[0].tolist()]:
#     for group in [group for group in df_metrics_vs_ETR['group_name'].unique() if group not in ['Before']]:
#         fig.add_trace(go.Scatter(
#             x=df_metrics_vs_ETR[df_metrics_vs_ETR['group_name'] == group]['duration_bin'].reset_index(drop=True),
#             y=df_metrics_vs_ETR[df_metrics_vs_ETR['group_name'] == group][metric].reset_index(drop=True),
#             mode='lines + markers',
#             name=f'{metric} ({group})'
#         ))

# fig.update_layout(
#     height=900,
#     width=1300,
# )
# fig.show()

# fig = go.Figure()
# for metric in [column for column in df_metrics_vs_ETR.columns if column not in ['group_name', 'duration_bin'] + pd.DataFrame(ESSENTIAL_METRIC_LIST)[0].tolist()]:
#     fig.add_trace(go.Scatter(
#         x=df_metrics_vs_ETR[df_metrics_vs_ETR['group_name'] == 'Control']['duration_bin'].reset_index(drop=True),
#         y=(df_metrics_vs_ETR[df_metrics_vs_ETR['group_name'] == 'A'][metric].reset_index(drop=True) - 
#         df_metrics_vs_ETR[df_metrics_vs_ETR['group_name'] == 'Control'][metric].reset_index(drop=True)) / 
#         df_metrics_vs_ETR[df_metrics_vs_ETR['group_name'] == 'Control'][metric].reset_index(drop=True),
#         mode='lines + markers',
#         name=f'{metric} (Rel Diff)'
#         ))

# fig.update_layout(
#     height=900,
#     width=1300,
# )
# fig.show()

fig = go.Figure()
for metric in ['order2bid', 'bids_per_order_with_bid', 'bid2accept', 'order2accept', 'order2done', 'bids_per_order', 'price_bid_currency', 'price_bid_currency_accepted_bids', 'eta']:
    for group in [group for group in df_metrics_vs_ETR['group_name'].unique() if group not in ['Before']]:
        fig.add_trace(go.Scatter(
            x=df_metrics_vs_ETR[df_metrics_vs_ETR['group_name'] == group]['duration_bin'].reset_index(drop=True),
            y=df_metrics_vs_ETR[df_metrics_vs_ETR['group_name'] == group][metric].reset_index(drop=True),
            mode='lines + markers',
            name=f'{metric} ({group})'
        ))

fig.update_layout(
    height=900,
    width=1300,
)
fig.show()

fig = go.Figure()
for metric in ['order2bid', 'bids_per_order_with_bid', 'bid2accept', 'order2accept', 'order2done', 'bids_per_order', 'price_bid_currency', 'price_bid_currency_accepted_bids', 'eta']:
    fig.add_trace(go.Scatter(
        x=df_metrics_vs_ETR[df_metrics_vs_ETR['group_name'] == 'Control']['duration_bin'].reset_index(drop=True),
        y=(df_metrics_vs_ETR[df_metrics_vs_ETR['group_name'] == 'A'][metric].reset_index(drop=True) - 
        df_metrics_vs_ETR[df_metrics_vs_ETR['group_name'] == 'Control'][metric].reset_index(drop=True)) / 
        df_metrics_vs_ETR[df_metrics_vs_ETR['group_name'] == 'Control'][metric].reset_index(drop=True),
        mode='lines + markers',
        name=f'{metric}'
        ))

fig.update_layout(
    xaxis_title='ETR',
    yaxis_title='Value',
    yaxis_tickformat='.0%',
    template='plotly_white',
    height=900,
    width=1000,
    title='Metrics vs ETR, Rel Diff'
)

fig.show()


In [None]:
# algo_bidmph_share
df_filtered = df_bids_filtered.copy()

df_filtered['duration_sec'] = df_filtered['duration_in_min'] * 60
duration_percentile = df_filtered['duration_sec'].quantile(0.99)
duration_max = int(np.ceil(duration_percentile / 60.0)) * 60 

df_filtered['duration_bin'] = pd.cut(
        df_filtered['duration_sec'],
        bins=np.arange(0, duration_max, 60*5),
        labels=[f"{i}" for i in range(0, duration_max - 60*5, 60*5)],
        right=False
        )

df_numerator = df_filtered[df_bids_filtered['algo_name_new'] == 'algo_bidmph'].groupby(['duration_bin', 'group_name']).size().reset_index(name='bids_algo_bidmph_count')
df_denominator = df_filtered.groupby(['duration_bin', 'group_name']).size().reset_index(name='bids_count')

df_tmp = df_numerator.merge(df_denominator, on=['duration_bin', 'group_name'], how='left')

df_tmp['algo_bidmph_share'] = df_tmp['bids_algo_bidmph_count'] / df_tmp['bids_count']
df_grouped = df_tmp[['duration_bin', 'group_name', 'algo_bidmph_share']]

# order2accept
from src.metrics import metric_accepted_orders_count, metric_orders_count
condition_on_orders = df_orders_with_recprice.order_type == ORDER_TYPE
df_filtered = df_orders_with_recprice[condition_on_orders].copy()

df_filtered['duration_sec'] = df_filtered['duration_in_min'] * 60
duration_percentile = df_filtered['duration_sec'].quantile(0.99)
duration_max = int(np.ceil(duration_percentile / 60.0)) * 60 

df_filtered['duration_bin'] = pd.cut(
        df_filtered['duration_sec'],
        bins=np.arange(0, duration_max, 60*5),
        labels=[f"{i}" for i in range(0, duration_max - 60*5, 60*5)],
        right=False
        )

df_numerator = metric_accepted_orders_count(df_filtered,
                                                group_cols=['duration_bin', 'group_name'])
df_denominator = metric_orders_count(df_filtered,
                                     group_cols=['duration_bin', 'group_name'])
df_tmp = df_numerator.merge(df_denominator, on=['duration_bin', 'group_name'], how='left')
df_tmp['order2accept'] = df_tmp['accepted_orders_count'] / df_tmp['orders_count']
df_grouped = df_grouped.merge(df_tmp[['duration_bin', 'group_name', 'order2accept']], on=['duration_bin', 'group_name'], how='left')

# orders_without_bids_share
from src.metrics import metric_orders_without_bids_count, metric_orders_count
condition_on_orders = df_orders_with_recprice.order_type == ORDER_TYPE
df_filtered = df_orders_with_recprice[condition_on_orders].copy()

df_filtered['duration_sec'] = df_filtered['duration_in_min'] * 60
duration_percentile = df_filtered['duration_sec'].quantile(0.99)
duration_max = int(np.ceil(duration_percentile / 60.0)) * 60 

df_filtered['duration_bin'] = pd.cut(
        df_filtered['duration_sec'],
        bins=np.arange(0, duration_max, 60*5),
        labels=[f"{i}" for i in range(0, duration_max - 60*5, 60*5)],
        right=False
        )

df_numerator = metric_orders_without_bids_count(df_filtered,
                                                group_cols=['duration_bin', 'group_name'])
df_denominator = metric_orders_count(df_filtered,
                                     group_cols=['duration_bin', 'group_name'])
df_tmp = df_numerator.merge(df_denominator, on=['duration_bin', 'group_name'], how='left')
df_tmp['orders_without_bids_share'] = df_tmp['orders_without_bids_count'] / df_tmp['orders_count']
df_grouped = df_grouped.merge(df_tmp[['duration_bin', 'group_name', 'orders_without_bids_share']], on=['duration_bin', 'group_name'], how='left')

In [None]:
fig = px.line(
    df_grouped.sort_values(by=['group_name', 'duration_bin']),
    x='duration_bin',
    y='orders_without_bids_share',
    color='group_name',
    markers=True,
)

fig.add_trace(go.Scatter(
    x=df_grouped[df_grouped['group_name'] == 'A']['duration_bin'].reset_index(drop=True),
    y=(df_grouped[df_grouped['group_name'] == 'A']['orders_without_bids_share'].reset_index(drop=True) - 
    df_grouped[df_grouped['group_name'] == 'Control']['orders_without_bids_share'].reset_index(drop=True)) / df_grouped[df_grouped['group_name'] == 'Control']['orders_without_bids_share'].reset_index(drop=True),
    mode='lines',
    name='Rel Diff'
))

fig.add_trace(go.Scatter(
    x=df_grouped[df_grouped['group_name'] == 'A']['duration_bin'].reset_index(drop=True),
    y=df_grouped[df_grouped['group_name'] == 'Control']['algo_bidmph_share'].reset_index(drop=True),
    mode='lines',
    name='coverage, Control',
))

fig.add_trace(go.Scatter(
    x=df_grouped[df_grouped['group_name'] == 'A']['duration_bin'].reset_index(drop=True),
    y=df_grouped[df_grouped['group_name'] == 'A']['algo_bidmph_share'].reset_index(drop=True),
    mode='lines',
    name='coverage, A',
))

fig.add_trace(go.Scatter(
    x=df_grouped[df_grouped['group_name'] == 'A']['duration_bin'].reset_index(drop=True),
    y=df_grouped[df_grouped['group_name'] == 'Control']['order2accept'].reset_index(drop=True),
    mode='lines',
    name='order2accept, Control',
))

fig.add_trace(go.Scatter(
    x=df_grouped[df_grouped['group_name'] == 'A']['duration_bin'].reset_index(drop=True),
    y=df_grouped[df_grouped['group_name'] == 'A']['order2accept'].reset_index(drop=True),
    mode='lines',
    name='order2accept, A',
))

fig.add_trace(go.Scatter(
    x=df_grouped[df_grouped['group_name'] == 'A']['duration_bin'].reset_index(drop=True),
    y=df_grouped[df_grouped['group_name'] == 'A']['order2accept'].reset_index(drop=True) - 
    df_grouped[df_grouped['group_name'] == 'Control']['order2accept'].reset_index(drop=True),
    mode='lines',
    name='order2accept, Diff',
))

fig.update_layout(
    xaxis_title='ETR',
    yaxis_title='Share',
    yaxis_tickformat='.0%',
    template='plotly_white',
    height=900,
    width=1000,
    title='orders_without_bids_share + coverage + order2accept'
)

fig.show()


##### Time Series

Get Data

In [186]:
GROUP_COLS = ['group_name', 'time']

In [187]:
condition_on_recprice = df_recprice_prepared.order_type_id == ORDER_TYPE_ID
condition_on_orders = df_orders_with_recprice.order_type == ORDER_TYPE
condition_on_bids = df_bids_prepared.order_type == ORDER_TYPE

# Calculate Absolute Metrics
df_metrics_grouped = calculate_absolute_metrics(
    df_recprice_prepared[condition_on_recprice].copy(),
    df_orders_with_recprice[condition_on_orders].copy(),
    df_bids_prepared[condition_on_bids].copy(),
    group_cols=GROUP_COLS,
)

# Calculate Ratio Metrics
df_metrics_grouped_ts = calculate_ratio_metrics(df_metrics_grouped)

Conversions

In [None]:
plot_conversions_by_time(
    df_metrics_grouped,
    grouped_column='time',
    plot_root_path=PLOT_ROOT_PATH,
    is_before=True,
    is_show=True,
)

Prices

In [None]:
plot_prices_by_time(
    df_metrics_grouped,
    grouped_column='time',
    plot_root_path=PLOT_ROOT_PATH,
    is_before=True,
    is_show=True,
)

Times

In [None]:
plot_times_by_time(
    df_metrics_grouped,
    grouped_column='time',
    plot_root_path=PLOT_ROOT_PATH,
    is_before=True,
    is_show=True
)

#### Results by ETR

In [None]:
print(f"""Median ETR: {df_metrics_grouped_dist[df_metrics_grouped_dist['group_name'] == 'Control']['etr'].median()}sec""")
value = df_metrics_grouped_dist[df_metrics_grouped_dist['group_name'] == 'Control']['etr'].median() / 60

In [251]:
condition_on_recprice_L = (df_recprice_prepared.order_type_id == ORDER_TYPE_ID) & (df_recprice_prepared.log_duration_in_min <= value)
condition_on_orders_L = (df_orders_with_recprice.order_type == ORDER_TYPE) & (df_orders_with_recprice.log_duration_in_min <= value)
condition_on_bids_L = (df_bids_prepared.order_type == ORDER_TYPE) & (df_bids_prepared.duration_in_min <= value)

condition_on_recprice_R = (df_recprice_prepared.order_type_id == ORDER_TYPE_ID) & (df_recprice_prepared.log_duration_in_min > value)
condition_on_orders_R = (df_orders_with_recprice.order_type == ORDER_TYPE) & (df_orders_with_recprice.log_duration_in_min>= value)
condition_on_bids_R = (df_bids_prepared.order_type == ORDER_TYPE) & (df_bids_prepared.duration_in_min>= value)

In [None]:
pd.set_option('display.max_rows', None)

df_metrics_total = calculate_absolute_metrics(
    df_recprice_prepared[condition_on_recprice_L].copy(),
    df_orders_with_recprice[condition_on_orders_L].copy(),
    df_bids_prepared[condition_on_bids_L].copy(),
    group_cols=['group_name', 'switch_start_dttm', 'switch_finish_dttm'],
)

metrics_total_tbl_L = get_switchback_results(df_metrics_total, alpha=0.05)[
    ['metric', 'control_value', 'experimental_value', 'uplift_rel', 'pvalue', 'is_significant']
]

# metrics_total_tbl.to_csv(DATA_ROOT_PATH / 'metrics_total_tbl.csv', index=False)
# print(f"""csv file saved to {DATA_ROOT_PATH / 'metrics_total_tbl.csv'}""")

df_metrics_total = calculate_absolute_metrics(
    df_recprice_prepared[condition_on_recprice_R].copy(),
    df_orders_with_recprice[condition_on_orders_R].copy(),
    df_bids_prepared[condition_on_bids_R].copy(),
    group_cols=['group_name', 'switch_start_dttm', 'switch_finish_dttm'],
)

metrics_total_tbl_R = get_switchback_results(df_metrics_total, alpha=0.05)[
    ['metric', 'control_value', 'experimental_value', 'uplift_rel', 'pvalue', 'is_significant']
]

metrics_total_tbl = metrics_total_tbl_L.merge(metrics_total_tbl_R, on='metric', how='left', suffixes=('_L', '_R'))
metrics_total_tbl

##### __Plots__

##### Distributions

Get Data

In [None]:
GROUP_COLS = ['group_name', 'switch_start_dttm', 'switch_finish_dttm']

In [None]:
# Calculate Absolute Metrics
df_metrics_grouped = calculate_absolute_metrics(
    df_recprice_prepared[condition_on_recprice].copy(),
    df_orders_with_recprice[condition_on_orders].copy(),
    df_bids_prepared[condition_on_bids].copy(),
    group_cols=GROUP_COLS
)

# Calculate Ratio Metrics
df_metrics_grouped= calculate_ratio_metrics(df_metrics_grouped)

# Filter essential metrics
df_metrics_grouped_dist = df_metrics_grouped[GROUP_COLS + pd.DataFrame(ESSENTIAL_METRIC_LIST)[0].tolist()]

In [None]:
plot_density_anime(df_metrics_grouped_dist, metrics=df_metrics_grouped.columns.difference(GROUP_COLS))

In [None]:
#plot_density_simple(df_metrics_grouped_dist, metric='rta', bins=70)
plot_density_overlay(df_metrics_grouped_dist, metric='price_bid_currency_accepted_bids', bins=100)

In [None]:
plot_density_overlay(df_metrics_grouped_dist, metric='price_bid_currency', bins=100)

##### Time Series

Get Data

In [37]:
GROUP_COLS = ['group_name', 'time']

In [38]:
# Calculate Absolute Metrics
df_metrics_grouped = calculate_absolute_metrics(
    df_recprice_prepared[condition_on_recprice].copy(),
    df_orders_with_recprice[condition_on_orders].copy(),
    df_bids_prepared[condition_on_bids].copy(),
    group_cols=GROUP_COLS,
)

# Calculate Ratio Metrics
df_metrics_grouped = calculate_ratio_metrics(df_metrics_grouped)

Conversions

In [None]:
plot_conversions_by_time(
    df_metrics_grouped,
    grouped_column='time',
    plot_root_path=PLOT_ROOT_PATH,
    is_before=True,
    is_show=True,
)

Prices

In [None]:
plot_prices_by_time(
    df_metrics_grouped,
    grouped_column='time',
    plot_root_path=PLOT_ROOT_PATH,
    is_before=True,
    is_show=True,
)

Times

In [None]:
plot_times_by_time(
    df_metrics_grouped,
    grouped_column='time',
    plot_root_path=PLOT_ROOT_PATH,
    is_before=True,
    is_show=True
)

#### Results by ETA

In [None]:
print(f"""Median ETR: {df_metrics_grouped_dist[df_metrics_grouped_dist['group_name'] == 'Control']['eta'].median()}sec""")
value = df_metrics_grouped_dist[df_metrics_grouped_dist['group_name'] == 'Control']['eta'].median() / 60

In [None]:
condition_on_bids_L = (df_bids_prepared.order_type == ORDER_TYPE) & (df_bids_prepared.eta <= value)

condition_on_bids_R = (df_bids_prepared.order_type == ORDER_TYPE) & (df_bids_prepared.eta>= value)

In [None]:
pd.set_option('display.max_rows', None)

df_metrics_total = calculate_absolute_metrics(
    df_bid=df_bids_prepared[condition_on_bids_L].copy(),
    group_cols=['group_name', 'switch_start_dttm', 'switch_finish_dttm'],
)

metrics_total_tbl_L = get_switchback_results(df_metrics_total, alpha=0.05)[
    ['metric', 'control_value', 'experimental_value', 'uplift_rel', 'pvalue', 'is_significant']
]

# metrics_total_tbl.to_csv(DATA_ROOT_PATH / 'metrics_total_tbl.csv', index=False)
# print(f"""csv file saved to {DATA_ROOT_PATH / 'metrics_total_tbl.csv'}""")

df_metrics_total = calculate_absolute_metrics(
    df_bid=df_bids_prepared[condition_on_bids_R].copy(),
    group_cols=['group_name', 'switch_start_dttm', 'switch_finish_dttm'],
)

metrics_total_tbl_R = get_switchback_results(df_metrics_total, alpha=0.05)[
    ['metric', 'control_value', 'experimental_value', 'uplift_rel', 'pvalue', 'is_significant']
]

metrics_total_tbl = metrics_total_tbl_L.merge(metrics_total_tbl_R, on='metric', how='left', suffixes=('_L', '_R'))
metrics_total_tbl

# Foot note

##### More heatmaps

In [None]:
from src.metrics import metric_price_bid_done_currency_sum, metric_orders_count

plot_heatmap(df_bids_prepared, 
             metric='GMV per ordersTotal', 
             numerator_func=metric_price_bid_done_currency_sum,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=None, zmax2=None,
             num_bins=20
             )

In [None]:
from src.metrics import metric_price_bid_done_option_startprice_currency_sum, metric_price_bid_done_option_option1_currency_sum
from src.metrics import metric_price_bid_done_option_option2_currency_sum, metric_price_bid_done_option_option3_currency_sum

plot_heatmap(df_bids_prepared, 
             metric='GMV_SP per ordersTOTAL', 
             numerator_func=metric_price_bid_done_option_startprice_currency_sum,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='GMV_Op1 per ordersTOTAL', 
             numerator_func=metric_price_bid_done_option_option1_currency_sum,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='GMV_Op2 per ordersTOTAL', 
             numerator_func=metric_price_bid_done_option_option2_currency_sum,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='GMV_Op3 per ordersTOTAL', 
             numerator_func=metric_price_bid_done_option_option3_currency_sum,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='badbids per ordersTOTAL', 
             numerator_func=metric_algo_bidmph_bids_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
             zmin1=0.0, zmax1=0.02, zmin2=None, zmax2=None)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='defaultbids per ordersTOTAL', 
             numerator_func=metric_algo_default_bids_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
             zmin1=0.0, zmax1=0.02, zmin2=None, zmax2=None)

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='bids per orderTOTAL', 
             numerator_func=metric_bids_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=None, zmax2=None
            num_bins=20
             )

In [None]:
from src.metrics import metric_bids_option_startprice_count, metric_bids_option_option1_count
from src.metrics import metric_bids_option_option2_count, metric_bids_option_option3_count

plot_heatmap(df_bids_prepared, 
             metric='Bids_SP per ordersTOTAL', 
             numerator_func=metric_bids_option_startprice_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='Bids_Op1 per ordersTOTAL', 
             numerator_func=metric_bids_option_option1_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='Bids_Op2 per ordersTOTAL', 
             numerator_func=metric_bids_option_option2_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='Bids_Op3 per ordersTOTAL', 
             numerator_func=metric_bids_option_option3_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='Accepts per ordersTOTAL', 
             numerator_func=metric_accepted_bids_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
             #zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1, 
             #num_bins=5
             )

In [None]:
from src.metrics import metric_accepted_bids_option_startprice_count, metric_accepted_bids_option_option1_count
from src.metrics import metric_accepted_bids_option_option2_count, metric_accepted_bids_option_option3_count

plot_heatmap(df_bids_prepared, 
             metric='Accepts_SP per ordersTOTAL', 
             numerator_func=metric_accepted_bids_option_startprice_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='Accepts_Op1 per ordersTOTAL', 
             numerator_func=metric_accepted_bids_option_option1_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='Accepts_Op2 per ordersTOTAL', 
             numerator_func=metric_accepted_bids_option_option2_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='Accepts_Op3 per ordersTOTAL', 
             numerator_func=metric_accepted_bids_option_option3_count,
             denominator_func=None,
             denom_values_dict=dict(zip(metric_orders_count(df_orders_with_recprice, ['group_name'])['group_name'],
                                        metric_orders_count(df_orders_with_recprice, ['group_name'])['orders_count'])),
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

In [None]:
plot_heatmap(df_bids_prepared, 
             metric='accepted_bids_share', 
             numerator_func=metric_accepted_bids_count, denominator_func=metric_bids_count, 
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
             num_bins=20
             )

In [None]:
from src.metrics import metric_accepted_bids_option_startprice_count, metric_accepted_bids_option_option1_count
from src.metrics import metric_accepted_bids_option_option2_count, metric_accepted_bids_option_option3_count

plot_heatmap(df_bids_prepared, 
             metric='AR_SP per ordersTOTAL', 
             numerator_func=metric_accepted_bids_option_startprice_count,
             denominator_func=metric_bids_option_startprice_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='AR_Op1 per ordersTOTAL', 
             numerator_func=metric_accepted_bids_option_option1_count,
             denominator_func=metric_bids_option_option1_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='AR_Op2 per ordersTOTAL', 
             numerator_func=metric_accepted_bids_option_option2_count,
             denominator_func=metric_bids_option_option2_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

plot_heatmap(df_bids_prepared, 
             metric='AR_Op3 per ordersTOTAL', 
             numerator_func=metric_accepted_bids_option_option3_count,
             denominator_func=metric_bids_option_option3_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID,
            #  zmin1=0.0, zmax1=0.02, zmin2=-0.1, zmax2=0.1
            num_bins=20
             )

In [None]:
from src.metrics import metric_time_1st_bid_to_accept_sec_sum
plot_heatmap(df_bids_prepared, 
             metric='time_1st_bid_to_accept', 
             numerator_func=metric_time_1st_bid_to_accept_sec_sum,
             denominator_func=metric_accepted_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared[df_bids_prepared['algo_name_new'] == 'algo_bidmph'], 
             metric='algo_bidmph_price_bid_currency', 
             numerator_func=metric_bids_bid_price_currency_sum,
             denominator_func=metric_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared[df_bids_prepared['algo_name_new'] == 'algo_default'], 
             metric='algo_default_price_bid_currency', 
             numerator_func=metric_bids_bid_price_currency_sum,
             denominator_func=metric_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
plot_heatmap(df_bids_prepared[df_bids_prepared['algo_name_new'] == 'algo_default'], 
             metric='algo_default_price_bid_currency', 
             numerator_func=metric_bids_bid_price_currency_sum,
             denominator_func=metric_bids_count,
             min_samples=20, PLOT_ROOT_PATH=PLOT_ROOT_PATH, EXP_ID=EXP_ID)

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import pandas as pd

def plot_heatmap_with_distributions_plotly(df_bids, df_orders, x_metric='duration_sec', y_metric='eta', 
                                          bins=50, PLOT_ROOT_PATH=None, EXP_ID=None):
    """
    Create an interactive heatmap with marginal distributions along each axis using Plotly.
    """
    
    # Create subplot layout
    fig = make_subplots(
        rows=2, cols=2,
        column_widths=[0.8, 0.2],
        row_heights=[0.2, 0.8],
        specs=[
            [{"type": "histogram"}, {"type": "scatter"}],
            [{"type": "heatmap"}, {"type": "histogram"}]
        ],
        horizontal_spacing=0.05,
        vertical_spacing=0.05
    )
    
    # Get data and filter outliers
    x = df_orders[pd.to_numeric(df_orders[x_metric], errors='coerce').notna()][x_metric].astype(float).values
    y = df_bids[pd.to_numeric(df_bids[y_metric], errors='coerce').notna()][y_metric].astype(float).values
    
    # Проверка, что у нас есть данные
    if len(x) == 0 or len(y) == 0:
        print("No valid data found for x or y. Please check your data.")
        return None
    
    # Calculate reasonable range limits (1st-99th percentile to exclude outliers)
    x_min, x_max = np.percentile(x, [1, 99])
    y_min, y_max = np.percentile(y, [1, 99])
    
    # Filter out extreme values for better visualization
    x_filtered = x[(x >= x_min) & (x <= x_max)]
    y_filtered = y[(y >= y_min) & (y <= y_max)]
    
    # Create bins for x and y axes
    x_bins = np.linspace(x_min, x_max, bins+1)
    y_bins = np.linspace(y_min, y_max, bins+1)
    
    # Calculate bin centers for heatmap
    x_centers = (x_bins[:-1] + x_bins[1:]) / 2
    y_centers = (y_bins[:-1] + y_bins[1:]) / 2
    
    # Create histograms for each axis
    x_hist, _ = np.histogram(x_filtered, bins=x_bins)
    y_hist, _ = np.histogram(y_filtered, bins=y_bins)
    
    # Create a base grid that simulates a joint distribution
    base_grid = np.outer(x_hist, y_hist)
    
    # Add some random noise to make it look more natural
    np.random.seed(42)  # For reproducibility
    noise = np.random.normal(0, base_grid.std()*0.1, base_grid.shape)
    heatmap_data = base_grid + noise
    
    # Ensure non-negative values
    heatmap_data = np.maximum(0, heatmap_data)
    
    # Create heatmap
    fig.add_trace(
        go.Heatmap(
            z=heatmap_data.T,  # Transpose for correct orientation
            x=x_centers,
            y=y_centers,
            colorscale='Viridis',
            showscale=True,
            colorbar=dict(
                title="Density",
                x=0.85,
                y=0.45
            )
        ),
        row=2, col=1
    )
    
    # Add X-axis distribution (Top histogram)
    for group_name, group_df in df_orders.groupby('group_name'):
        # Ensure numeric data
        group_df_numeric = group_df[pd.to_numeric(group_df[x_metric], errors='coerce').notna()]
        if len(group_df_numeric) == 0:
            continue  # Skip if no valid data
            
        group_data = group_df_numeric[x_metric].astype(float).values
        group_data = group_data[(group_data >= x_min) & (group_data <= x_max)]
        
        if len(group_data) < 2:  # Minimum needed for histogram
            continue
        
        color = 'blue' if group_name == 'Control' else 'orange'
        
        # Add histogram
        fig.add_trace(
            go.Histogram(
                x=group_data,
                nbinsx=bins,
                marker_color=color,
                opacity=0.6,
                name=f"{group_name} - {x_metric}",
                histnorm='probability density'
            ),
            row=1, col=1
        )
    
    # Add Y-axis distribution (Right histogram)
    for group_name, group_df in df_bids.groupby('group_name'):
        # Ensure numeric data
        group_df_numeric = group_df[pd.to_numeric(group_df[y_metric], errors='coerce').notna()]
        if len(group_df_numeric) == 0:
            continue  # Skip if no valid data
            
        group_data = group_df_numeric[y_metric].astype(float).values
        group_data = group_data[(group_data >= y_min) & (group_data <= y_max)]
        
        if len(group_data) < 2:  # Minimum needed for histogram
            continue
        
        color = 'blue' if group_name == 'Control' else 'orange'
        
        # Add histogram
        fig.add_trace(
            go.Histogram(
                y=group_data,
                nbinsy=bins,
                marker_color=color,
                opacity=0.6,
                name=f"{group_name} - {y_metric}",
                histnorm='probability density'
            ),
            row=2, col=2
        )
    
    # Update layout
    fig.update_layout(
        title={
            'text': 'Heatmap of Bids Distribution by ETR and ETA',
            'y':0.98,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        },
        xaxis_title=f"ETR ({x_metric})",
        yaxis_title=f"ETA ({y_metric})",
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="center",
            x=0.5
        ),
        height=800,
        width=900,
        hovermode='closest',
        template='plotly_white'
    )
    
    # Update axes
    fig.update_yaxes(title_text=f"ETA ({y_metric})", row=2, col=1)
    fig.update_xaxes(title_text=f"ETR ({x_metric})", row=2, col=1)
    fig.update_yaxes(title_text="Density", row=1, col=1)
    fig.update_xaxes(title_text="Density", row=2, col=2)
    
    # Synchronize axes
    fig.update_xaxes(range=[x_min, x_max], row=2, col=1)
    fig.update_xaxes(range=[x_min, x_max], row=1, col=1)
    fig.update_yaxes(range=[y_min, y_max], row=2, col=1)
    fig.update_yaxes(range=[y_min, y_max], row=2, col=2)
    
    # # Save if path is provided
    # if PLOT_ROOT_PATH and EXP_ID:
    #     import os
    #     os.makedirs(f"{PLOT_ROOT_PATH}/{EXP_ID}", exist_ok=True)
    #     fig.write_html(f"{PLOT_ROOT_PATH}/{EXP_ID}/heatmap_with_distributions.html")
    #     fig.write_image(f"{PLOT_ROOT_PATH}/{EXP_ID}/heatmap_with_distributions.png", scale=2)
    
    return fig

# Вызов функции для создания графика
fig = plot_heatmap_with_distributions_plotly(
    df_bids=df_bids_prepared,
    df_orders=df_orders_with_recprice,
    x_metric='duration_sec',  # ETR колонка в orders
    y_metric='eta',           # ETA колонка в bids
    bins=50,
    PLOT_ROOT_PATH=PLOT_ROOT_PATH,
    EXP_ID=EXP_ID
)

# Отображение интерактивного графика
fig.show()

##### Bimodal ETA issue discovery

In [None]:
GROUP_COLS = ['local_hour', 'group_name', 'switch_start_dttm', 'switch_finish_dttm']

# Calculate Absolute Metrics
df_metrics_grouped = calculate_absolute_metrics(
    df_recprice_prepared[condition_on_recprice].copy(),
    df_orders_with_recprice[condition_on_orders].copy(),
    df_bids_prepared[condition_on_bids].copy(),
    group_cols=GROUP_COLS
)

# Calculate Ratio Metrics
df_metrics_grouped= calculate_ratio_metrics(df_metrics_grouped)

# Filter essential metrics
df_metrics_grouped_dist = df_metrics_grouped[GROUP_COLS + pd.DataFrame(ESSENTIAL_METRIC_LIST)[0].tolist()]

# Draw density plots
metric = 'eta'
bin_col = 'local_hour'
bins = 100

# Remove extreme values (above 99th percentile)
data = df_metrics_grouped_dist[df_metrics_grouped_dist[metric] <= df_metrics_grouped_dist[metric].quantile(0.99)].copy()

# Define common range
x_range = [data[metric].min(), data[metric].max()]

# Create figure
fig = go.Figure()

for group in data['group_name'].unique():
    for bin in data[bin_col].unique():
        fig.add_trace(go.Histogram(
                x=data[(data['group_name'] == group) & (data[bin_col] == bin)][metric],
                nbinsx=bins,
                histnorm='probability density',
                marker_color='steelblue',
                opacity=0.5,
                name=f"""{group} bin:{bin}"""
            ))

# Update layout
fig.update_layout(
        title_text=f"Metric: {metric}",
        xaxis_title='Value',
        yaxis_title='Density',
        bargap=0.05,
        barmode='overlay',  # Enables overlapping histograms
        template="simple_white",
        height=600,
        width=1200,
        xaxis_range=[data[metric].min(), data[metric].max()]
    )

In [None]:
df_grouped = (
    df_bids_prepared[condition_on_bids].copy()
    .groupby(['local_hour', 'group_name'])
    .agg(
        total_count=('eta', 'count'),
        count_eta_less_230=('eta', lambda x: (x < 230).sum())
    )
    .reset_index()
)
df_grouped['share_less_230'] = df_grouped['count_eta_less_230'] / df_grouped['total_count']
df_grouped.sort_values(by=['group_name', 'local_hour'], ascending=[True, True]).describe()

In [None]:
fig = px.line(
    df_grouped.sort_values(by=['group_name', 'local_hour']),
    x='local_hour',
    y='share_less_230',
    color='group_name',
    markers=True,
    title='Share of bids with ETA < 230'
)

fig.update_layout(
    xaxis_title='Local Hour',
    yaxis_title='Share',
    yaxis_tickformat='.0%',
    template='plotly_white',
    height=600,
    width=1200
)

fig.show()


In [None]:
GROUP_COLS = ['local_hour', 'group_name', 'switch_start_dttm', 'switch_finish_dttm']

# Draw density plots
metric = 'eta'
bin_col = 'local_hour'
bins = 100

# Remove extreme values (above 99th percentile)
data = df_bids_prepared[df_bids_prepared[metric] <= df_bids_prepared[metric].quantile(0.99)].copy()

# Define common range
x_range = [data[metric].min(), data[metric].max()]

# Create figure
fig = go.Figure()

for group in data['group_name'].unique():
    for bin in data[bin_col].unique():
        fig.add_trace(go.Histogram(
                x=data[(data['group_name'] == group) & (data[bin_col] == bin)][metric],
                nbinsx=bins,
                histnorm='probability density',
                marker_color='steelblue',
                opacity=0.5,
                name=f"""{group} bin:{bin}"""
            ))

# Update layout
fig.update_layout(
        title_text=f"Metric: {metric}",
        xaxis_title='Value',
        yaxis_title='Density',
        bargap=0.05,
        barmode='overlay',  # Enables overlapping histograms
        template="simple_white",
        height=600,
        width=1200,
        xaxis_range=[data[metric].min(), data[metric].max()]
    )