In [13]:
%load_ext autoreload
%autoreload 2

import pandas as pd
from get_data_copy import get_data
import plotly.express as px

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
def get_city_params(city_params):
    print(city_params)
    for city_id, params in city_params.items():
        # Если у города несколько конфигураций (как у 4373)
        if isinstance(params, list):
            for config in params:
                yield city_id, config['eta'], config['type_name'][0]
        # Для городов с одной конфигурацией
        else:
            yield city_id, params['eta'], params['type_name'][0]

def city_data(city_params):
    all_city_data = []

    for city_id, eta, type_name in get_city_params(city_params):
        print(f"{city_id}, {eta}, {type_name}")
        print(f'{city_id}...')
        df_temp = get_data(start_date='2025-02-01', stop_date='2025-02-28', 
                           pickup_eta_minutes=eta, 
                           city_id=city_id,
                           type_name=type_name)
        print(f'{city_id} done')
        all_city_data.append(df_temp)

    return pd.concat(all_city_data, ignore_index=True)

In [3]:
city_params = {
    4515: {'eta': 4.07, 'type_name': ["'auto_econom'"]},
    4180: {'eta': 4.24, 'type_name': ["'auto_econom'"]},
    4373: [
        {'eta': 6.10, 'type_name': ["'moto_econom'"]},  
        {'eta': 7.48, 'type_name': ["'auto_econom'"]}
    ],
    4196: {'eta': 5.12, 'type_name': ["'auto_econom'"]},
    4241: {'eta': 3.75, 'type_name': ["'auto_econom'"]}
}

In [5]:
df = city_data(city_params)
df.to_csv("/Users/georgiinusuev/PycharmProjects/work/badbids/exp_cities/output.csv", index=False)
df.transpose()

{4515: {'eta': 4.07, 'type_name': ["'auto_econom'"]}, 4180: {'eta': 4.24, 'type_name': ["'auto_econom'"]}, 4373: [{'eta': 6.1, 'type_name': ["'moto_econom'"]}, {'eta': 7.48, 'type_name': ["'auto_econom'"]}], 4196: {'eta': 5.12, 'type_name': ["'auto_econom'"]}, 4241: {'eta': 3.75, 'type_name': ["'auto_econom'"]}}
4515, 4.07, 'auto_econom'
4515...
4515 done
4180, 4.24, 'auto_econom'
4180...
4180 done
4373, 6.1, 'moto_econom'
4373...
4373 done
4373, 7.48, 'auto_econom'
4373...
4373 done
4196, 5.12, 'auto_econom'
4196...
4196 done
4241, 3.75, 'auto_econom'
4241...
4241 done


Unnamed: 0,0,1,2,3,4,5
city_id,4515,4180,4373,4373,4196,4241
type_name,auto_econom,auto_econom,moto_econom,auto_econom,auto_econom,auto_econom
rides_cnt,901525,599401,377337,288311,408601,903357
t_param,4.07,4.24,6.1,7.48,5.12,3.75
badbids_share_00,0.726889,0.663652,0.724388,0.794904,0.796652,0.674566
badbid_ratio_avg_00,1.195608,1.163848,1.330198,1.393964,1.255654,1.186291
rides_at_risk_00,0.636433,0.578085,0.649756,0.72735,0.732646,0.564442
badbids_share_05,0.615007,0.513415,0.648904,0.738093,0.697589,0.587461
badbid_ratio_avg_05,1.227881,1.20541,1.366117,1.422268,1.289247,1.209533
rides_at_risk_05,0.483838,0.388148,0.54507,0.651487,0.589529,0.46579


In [None]:
def create_data_dict(df, city_params, cities=None, types=None, alphas=["0%", "5%", "10%", "12.50%", "15%"]):
    # Функция для преобразования процента в формат колонки
    def param_to_column_suffix(p):
        p = p.replace("%", "").replace(".", "")
        if p == "0":
            return "00"
        elif p == "5":
            return "05"
        elif p == "10":
            return "10"
        elif p == "1250":
            return "125"
        elif p == "15":
            return "15"
        return p
    
    # Функция для получения eta по city_id и type_name
    def get_eta(city_id, type_name):
        params = city_params[city_id]
        if isinstance(params, list):
            # Для городов с несколькими конфигурациями
            for config in params:
                if config['type_name'][0].strip("'") == type_name:
                    return config['eta']
        else:
            # Для городов с одной конфигурацией
            if params['type_name'][0].strip("'") == type_name:
                return params['eta']
        return None
    
    # Фильтруем данные
    mask = True
    if cities:
        mask = mask & (df['city_id'].isin(cities))
    if types:
        mask = mask & (df['type_name'].isin(types))
    df_filtered = df[mask]
    
    # Создаем списки для каждого города и типа
    data = {
        "city": [],
        "type": [],
        "t_param": [],
        "param": [],
        "badbids_share": [],
        "rides_at_risk": []
    }
    
    # Для каждой строки в отфильтрованных данных
    for _, row in df_filtered.iterrows():
        city_id = int(row['city_id'])
        type_name = row['type_name']
        eta = get_eta(city_id, type_name)
        
        data["city"].extend([str(city_id)] * len(alphas))
        data["type"].extend([type_name] * len(alphas))
        data["t_param"].extend([eta] * len(alphas))
        data["param"].extend(alphas)
        data["badbids_share"].extend([
            row[f'badbids_share_{param_to_column_suffix(p)}'] 
            for p in alphas
        ])
        data["rides_at_risk"].extend([
            row[f'rides_at_risk_{param_to_column_suffix(p)}'] 
            for p in alphas
        ])
    
    return pd.DataFrame(data)

# Использование:
df_result = create_data_dict(df, 
                           city_params,
                           types=['auto_econom'] + 
                                 (['moto_econom'] if 4373 in df['city_id'].unique() else []))

# Сортируем результат
df_result = df_result.sort_values(['city', 'type', 'param'], 
                                key=lambda x: x.map(lambda p: float(p.replace('%', ''))) if x.name == 'param' else x)

df_result

Unnamed: 0,city,type,t_param,param,badbids_share,rides_at_risk
5,4180,auto_econom,4.24,0%,0.663652,0.578085
6,4180,auto_econom,4.24,5%,0.513415,0.388148
7,4180,auto_econom,4.24,10%,0.411824,0.282108
8,4180,auto_econom,4.24,12.50%,0.352196,0.227078
9,4180,auto_econom,4.24,15%,0.300787,0.183466
20,4196,auto_econom,5.12,0%,0.796652,0.732646
21,4196,auto_econom,5.12,5%,0.697589,0.589529
22,4196,auto_econom,5.12,10%,0.599567,0.455023
23,4196,auto_econom,5.12,12.50%,0.546976,0.392074
24,4196,auto_econom,5.12,15%,0.502219,0.341632


In [14]:
# Создание DataFrame
df_plot= pd.DataFrame(df_result)

# Визуализация
fig = px.scatter(df_plot, x="badbids_share", y="rides_at_risk", color="city",
                 title="Bad Bids Share vs Rides at Risk",
                 labels={"param": "Alpha Parameter", "badbids_share": "Bad Bids Share", 
                         "rides_at_risk": "Rides at Risk", "city": "City ID"})

# Добавляем param в hover template
fig.update_traces(hovertemplate="<b>City:</b> %{color}<br>" + 
                                "<b>Bad Bids Share:</b> %{x}<br>" +
                                "<b>Rides at Risk:</b> %{y}<br>" +
                                "<b>Alpha Parameter:</b> %{customdata}",
                  customdata=df_plot["param"])

fig.show()

In [48]:
# Load new data
df_new = pd.read_csv("//Users/georgiinusuev/PycharmProjects/work/badbids/exp_cities/ME Bidding Leaderboard Feb 26 2025 t_param 4.csv")
df_new = df_new.dropna() 
df_new = df_new[df_new['rides_cnt'] >= 300000]
df_new = df_new[~df_new.isin([0, 1]).any(axis=1)] 
df_new['city'] = 'other'
df_new['param'] = '0%'
df_new = df_new[df_new['type_name'] == 'auto_econom'][['city', 'param', 'badbids_share', 'rides_at_risk']]
df_new = df_new.reset_index(drop=True)

# Concatenate both datasets
df_combined = pd.concat([df_plot, df_new], ignore_index=True)

# Plot the graph
# fig = px.scatter(df_combined, x="badbids_share", y="rides_at_risk", color="city",
#                  title="Bad Bids Share vs Rides at Risk",
#                  labels={"param": "Alpha Parameter", "badbids_share": "Bad Bids Share", 
#                          "rides_at_risk": "Rides at Risk", "city": "City ID"},
#                  opacity=1.0)  

df_combined['city_type'] = df_combined['city'].astype(str) + '_' + df_combined['type'].astype(str)
fig = px.scatter(df_combined, x="badbids_share", y="rides_at_risk", color="city_type",
                 title="Bad Bids Share vs Rides at Risk", 
                 labels={"param": "Alpha Parameter", "badbids_share": "Bad Bids Share",
                         "rides_at_risk": "Rides at Risk", "city_type": "City and Type"},
                 opacity=1.0)

# Add hover template with param
fig.update_traces(hovertemplate="<b>Bad Bids Share:</b> %{x}<br>" +
                                "<b>Rides at Risk:</b> %{y}<br>" +
                                "<b>Alpha Parameter:</b> %{customdata[1]}",
                  customdata=df_combined[["city", "param"]].to_numpy())

# # Make the new data dimmer
# new_data_mask = df_combined["param"] == 0
# for trace, is_new_data in zip(fig.data, new_data_mask):
#     if is_new_data:
#         trace.opacity = 0.5  # Lower opacity for new points





import numpy as np
import plotly.graph_objects as go

# Коэффициенты уравнения прямой y = k * x
x1, y1 = 0.414221, 0.283353
k = y1 / x1  # Наклон линии

# Генерируем точки для линии регрессии
x_line = np.linspace(0, df_combined["badbids_share"].max(), 100)  # От 0 до максимального x
y_line = k * x_line  # Вычисляем y по уравнению y = kx

# Добавляем линию на график
fig.add_trace(
    go.Scatter(
        x=x_line, y=y_line,
        mode="lines",
        name="Regression Line",
        line=dict(color="black", width=2, dash="dash")
    )
)

# fig.update_layout(
#     xaxis=dict(range=[0.3, df_plot["badbids_share"].max()]),  
#     yaxis=dict(range=[0.2, df_plot["rides_at_risk"].max()])
# )

fig.show()


In [49]:
conditions = (
    ((df_plot['city'] == '4515') & (df_plot['param'] == '12.50%') & (df_plot['type'] == 'auto_econom')) |  # Puerto Vallarta
    ((df_plot['city'] == '4180') & (df_plot['param'] == '10%') & (df_plot['type'] == 'auto_econom')) |    # Veracruz
    ((df_plot['city'] == '4196') & (df_plot['param'] == '15%') & (df_plot['type'] == 'auto_econom')) |  # Cucuta
    ((df_plot['city'] == '4241') & (df_plot['param'] == '12.50%') & (df_plot['type'] == 'auto_econom')) |  # Tepic
    ((df_plot['city'] == '4373') & (df_plot['param'] == '15%') & (df_plot['type'] == 'moto_econom'))   # 
    # ((df_plot['city'] == 4373) & (df_plot['param'] == 0.125) & (df_plot['type'] == 'moto_econom')) |  # 
)

df_highlight = df_plot[conditions]

fig.add_trace(go.Scatter(
    x=df_highlight["badbids_share"],
    y=df_highlight["rides_at_risk"],
    mode="markers",
    marker=dict(
        symbol="square", 
        size=12, 
        color="rgba(0,0,0,0)",  # Прозрачный цвет внутри
        line=dict(width=2, color="red")  # Красный контур
    ),
    name="Highlighted Points"
))

fig.update_layout(
    height=800,  # Высота в пикселях
    width=1200,   # Ширина в пикселях
)

fig.show()

In [27]:
df_result.loc[14]

city                    4373
type             moto_econom
t_param                  6.1
param                    15%
badbids_share         0.5079
rides_at_risk        0.38282
Name: 14, dtype: object

In [137]:
df_highlight.sort_values(by="badbids_share")

Unnamed: 0,city,param,badbids_share,rides_at_risk
14,5548,15%,0.349761,0.255538
7,4180,10%,0.414221,0.283353
23,4241,12.50%,0.418264,0.281517
3,4515,12.50%,0.464603,0.325644
19,4196,15%,0.504859,0.343482


In [128]:
df_combined[:25]

Unnamed: 0,city,param,badbids_share,rides_at_risk
0,4515,0%,0.726504,0.634496
1,4515,5%,0.615343,0.482935
2,4515,10%,0.514078,0.373735
3,4515,12.50%,0.464603,0.325644
4,4515,15%,0.422331,0.28974
5,4180,0%,0.665002,0.5781
6,4180,5%,0.515468,0.388935
7,4180,10%,0.414221,0.283353
8,4180,12.50%,0.354519,0.228205
9,4180,15%,0.303058,0.184596
