# Library & INFO

In [49]:
#//LIBRARIES
import math
import numpy as np
import pandas as pd

import sys
import os 
sys.path.append(os.path.expanduser('~'))

from analysts_tools.growth import *

#Procurement tools
from procurement_lib import send_slack_notification,GoogleSheet,redash
from analystcommunity.read_connection_data_warehouse import run_read_dwd_query

from datetime import datetime, timedelta

In [50]:
city = 'SPO'

todays_date = datetime.today().strftime('%Y-%m-%d')
todays_date = pd.to_datetime(todays_date, format='%Y-%m-%d')
todays_date = pd.Timestamp(todays_date)

In [51]:
df_sheet=GoogleSheet("1P8vmNi-_t7BL0i7jjX2fpfnOuKYqqtrBTmqo3Qr3cfg")
df_scrapper = df_sheet.get_as_dataframe('SCR. ATDO')
df_assai_sht = df_sheet.get_as_dataframe('ASSAI')
df_fix_prov = df_sheet.get_as_dataframe('FIX INDEX')

In [52]:
# Informacion descriptiva de los productos prendidos en PAGINA
query = """
WITH 
cluster AS (
SELECT 
    sup.source_id,
    type AS cluster,
    ROW_NUMBER() OVER (PARTITION BY sup.source_id ORDER BY c.last_modified_at DESC, cluster DESC) AS rn
FROM dpr_product_pricing.dim_sku_cluster_period c
INNER JOIN dpr_shared.dim_stock_unit        su  ON su.sku = c.sku
INNER JOIN dpr_shared.dim_stock_unit        sup  ON nvl(nullif(su.source_parent_id,0),su.source_id) = sup.source_id

WHERE c.site_id in (4,6,9,11)
 AND su.active = 1
 AND su.archived = 0
 AND su.in_catalog = 1
 ),

skus AS (
SELECT
    s.identifier_value AS city,
        CASE WHEN cat.parent_description = 'Mercearia' 
         AND cat.description NOT IN ('Arroz', 'Açúcar, adoçantes e doces','Açúcar e adoçantes','Feijão','Grãos','Farinhas e misturas','Azeites, óleos e vinagres')
         THEN 'Despensa'
        ELSE cat.parent_description 
    END AS cat,
    cat.description AS subcat,
    COALESCE(clt.cluster,'TAIL') AS cluster,
    sup.source_id,
    sup.description
FROM dpr_shared.dim_stock_unit          su
INNER JOIN dpr_shared.dim_stock_unit    sup ON nvl(nullif(su.source_parent_id,0),su.source_id) = sup.source_id AND su.active = 1 AND su.archived = 0 AND su.in_catalog = 1
INNER JOIN dpr_shared.dim_site          s   ON s.site_id = sup.site_id
INNER JOIN dpr_shared.dim_category      cat ON cat.category_id = sup.category_id AND cat.super_category = 'Multicategoría'
LEFT JOIN cluster                       clt ON clt.source_id = sup.source_id AND clt.rn = 1
WHERE city IN ('SPO','CWB','BHZ','VCP')
),

penetracion AS (
SELECT
s.identifier_value AS region,
COUNT(DISTINCT fs.dim_customer) AS total_custom 
FROM dpr_sales.fact_sales                   fs
INNER JOIN dpr_shared.dim_site              s   ON s.site_id = fs.dim_site
INNER JOIN dpr_shared.dim_product           dp  ON dp.product_id = fs.dim_product
INNER JOIN dpr_shared.dim_category          cat ON cat.category_id = dp.category_id

WHERE 
    fs.gmv_enabled = TRUE
    AND cat.super_category = 'Multicategoría'
    AND fulfillment_order_status NOT IN ('CANCELLED', 'ARCHIVED','No value')
    AND fs.fb_order_status_id IN (1,6,7,8)
    AND fs.is_deleted = FALSE
    AND fs.dim_status = 1
    AND dp.is_slot = 'false'
    AND fs.gmv_pxq_local > 0
    AND s.identifier_value IN ('SPO','CWB','VCP','BHZ')
    AND DATE(fs.order_submitted_date) >= CURRENT_DATE - 14
GROUP BY s.identifier_value
),

sales AS (
SELECT
    --DATE(fs.order_submitted_date) AS fecha,
    sup.source_id,
    100.00*COUNT(DISTINCT fs.dim_customer)::FLOAT/AVG(p.total_custom) AS penet,
    SUM(fs.product_quantity_x_step_unit) AS cant,
    SUM(fs.gmv_pxq_local)/4.75 AS gmv_usd,
    --SUM(COALESCE(fsd.product_discount,0))/4.75 AS dct_usd,
    --dct_usd/gmv_usd AS per_dct,
    --AVG(COALESCE(inventory_p_fin,cogs_p_mtd)) AS costo,
    -- gmv_usd*margin/100.00 AS cash_margin,
    -- cash_margin-dct_usd AS net_cash_margin,
    -- 100.00*net_cash_margin/gmv_usd AS net_margin,
    100.00*gmv_usd/SUM(gmv_usd) OVER (PARTITION BY s.identifier_value) AS gmv_mix--,
    -- 100.00*SUM(fs.product_price*fs.product_quantity_x_step_unit)/SUM(min_price*fs.product_quantity_x_step_unit) AS gpi,
    -- 100.00*SUM(fs.product_price_discount*fs.product_quantity_x_step_unit)/SUM(min_price*fs.product_quantity_x_step_unit) AS npi

    
FROM dpr_sales.fact_sales                   fs
--INNER JOIN dpr_shared.dim_customer          dc  ON dc.customer_id = fs.dim_customer
INNER JOIN dpr_shared.dim_site              s   ON s.site_id = fs.dim_site
INNER JOIN dpr_shared.dim_product           dp  ON dp.product_id = fs.dim_product
INNER JOIN dpr_shared.dim_category          cat ON cat.category_id = dp.category_id
--LEFT JOIN dpr_sales.fact_sales_discounts    fsd ON fs.order_item_id = fsd.order_item_id 
INNER JOIN dpr_shared.dim_stock_unit        su  ON su.product_id = fs.dim_product
INNER JOIN dpr_shared.dim_stock_unit        sup ON nvl(nullif(su.source_parent_id,0),su.source_id) = sup.source_id
INNER JOIN penetracion                      p   ON p.region = s.identifier_value
--LEFT JOIN dpr_cross_business.fact_cross_business_insights m ON m.dim_stock_unit = sup.stock_unit_id AND m.dim_date = fs.dim_submitted_date AND DATE(fs.order_submitted_date) = DATE(current_date)
--LEFT JOIN dpr_product_pricing.obt_benchmark_product_prices  pb  ON pb.stock_unit_id = sup.stock_unit_id AND DATE(fs.order_submitted_date) = pb.benchmark_date

WHERE 
    fs.gmv_enabled = TRUE
    AND cat.super_category = 'Multicategoría'
    AND fulfillment_order_status NOT IN ('CANCELLED', 'ARCHIVED','No value')
    AND fs.fb_order_status_id IN (1,6,7,8)
    AND fs.is_deleted = FALSE
    AND fs.dim_status = 1
    AND dp.is_slot = 'false'
    AND fs.gmv_pxq_local > 0
    --AND (cogs_p_mtd > 0 OR inventory_p_fin > 0)
    AND s.identifier_value IN ('SPO','CWB','VCP','BHZ')
    AND DATE(fs.order_submitted_date) >= CURRENT_DATE - 14
GROUP BY 1,s.identifier_value
)

SELECT 
    skus.*,
    penet::FLOAT,
    cant::FLOAT,
    gmv_usd::FLOAT,
    gmv_mix::FLOAT
FROM skus
LEFT JOIN sales ON skus.source_id = sales.source_id"""
dfq1 = run_read_dwd_query(query)

In [53]:
# Informacion de precios y costos de los productos prendidos en PAGINA
query = """
SELECT 
sup.source_id,
--sup.description,
--p.min_base_price,
p.min_gross_price::FLOAT AS price,
p.min_pricing_price::FLOAT AS net_price,
--p.min_sale_price,
COALESCE(CASE WHEN dtd_cost_local = 0 THEN inventory_p_fin ELSE dtd_cost_local END,(1-gross_margin/100)*min_gross_price)::FLOAT AS cost,
-- ((1-gross_margin/100)*min_gross_price)::FLOAT AS cost,
COALESCE(100.00*(1-(cost/price)),gross_margin)::FLOAT AS mg,
COALESCE(100.00*(1-(cost/net_price)),net_pricing_margin)::FLOAT AS nmg,
discount_pricing_value::FLOAT AS dct

FROM dpr_product_pricing.dim_product_current_price p
INNER JOIN dpr_shared.dim_stock_unit        su  ON su.stock_unit_id = p.stock_unit_id
INNER JOIN dpr_shared.dim_stock_unit        sup ON nvl(nullif(su.source_parent_id,0),su.source_id) = sup.source_id
INNER JOIN dpr_shared.dim_category          cat ON cat.category_id = sup.category_id AND cat.super_category = 'Multicategoría'
LEFT JOIN  dpr_cross_business.fact_cross_business_insights m ON m.dim_stock_unit = sup.stock_unit_id AND m.dim_date = TO_CHAR(current_date,'YYYYMMDD')::INT
LEFT JOIN  dpr_cross_business.int_dtd_cost dt ON dt.dim_stock_unit = sup.stock_unit_id AND dt.dim_date_dtd = TO_CHAR(current_date,'YYYYMMDD')::INT
WHERE p.site_id in (4,6,9,11)
 AND su.active = 1
 AND su.archived = 0
 AND su.in_catalog = 1
 """
dfq2 = run_read_dwd_query(query)

In [54]:
# # Info Offers Dct. Proveedor
# query = """
# SELECT
#   s.identifier_value AS city,
#   dof.source_id AS offer_id,
#   dof.description AS offer_name,
#   dot.description AS offer_type,
# --   fso.date_created AS date_created,
# --   fso.date_updated AS date_updated,
# --   duc."name" AS user_creator,
# --   duu.name AS user_updater,
#   fso.start_date AS start_date,
#   fso.end_date AS end_date,
#   fso.discount,
#   fso.max_uses_per_order,
#   fso.max_uses_per_customer,
#   fso.use_segment_to_exclude,
#   fso.customer_segment_id,
#   cat.parent_description AS cat,
#   cat.description AS subcat,
#   su.source_id,
#   su.card_Description AS product
    
# FROM dpr_sales.fact_sales_offers        fso -- Shared dimensions
# INNER JOIN dpr_shared.dim_date              ON dim_date.date_id = fso.dim_date_created
# INNER JOIN dpr_shared.dim_site          s   ON s.site_id = fso.dim_site
# --INNER JOIN dpr_shared.dim_user_admin duc ON duc.user_admin_id = fso.dim_user_creator
# --INNER JOIN dpr_shared.dim_user_admin duu ON duu.user_admin_id = fso.dim_user_updater -- Model dimensions
# INNER JOIN dpr_sales.dim_offer          dof ON dof.offer_id = fso.dim_offer
# INNER JOIN dpr_sales.dim_offer_type     dot ON dot.offer_type_id = fso.dim_offer_type
# INNER JOIN dpr_shared.dim_stock_unit    su  ON fso.dim_stock_unit = su.stock_unit_id
# INNER JOIN dpr_shared.dim_category      cat ON su.category_id = cat.category_id
# --left join dpr_sales.dim_customers_segments dcs ON dcs.customer_segment_id = fso.customer_segment_id -- Historical changes

# WHERE fso.automatically_added = 1
#  AND fso.start_date <= current_date
#  AND fso.end_date > current_date
#  AND dof.description ILIKE 'ACMKTPLC%'
#  """
# df_prov = run_read_dwd_query(query)

In [55]:
query = """
select
    competitor.competitor_name AS competitor_name,   
    site.identifier_value as site_code,
    quotation_date.full_date AS quotation_date,
    su.source_id,
    ROUND(cpp.product_selected_price,2)::float as price--ROUND(MEDIAN(cpp.product_selected_price),2)::float as price
from dpr_product_pricing.fact_collected_product_prices cpp
    inner join dpr_shared.dim_date quotation_date
        on cpp.dim_quotation_date = quotation_date.date_id
    inner join dpr_shared.dim_time quotation_time
        on cpp.dim_quotation_time = quotation_time.time_id
    inner join dpr_shared.dim_site site
        on cpp.dim_site = site.site_id
    inner join dpr_shared.dim_category cat
        on cpp.dim_category = cat.category_id
    inner join dpr_product_pricing.dim_product_outlier_type outlier_type
        on cpp.dim_outlier_type = outlier_type.outlier_type_id
    inner join dpr_product_pricing.dim_product_source_type source_type
        on cpp.dim_source_type = source_type.source_type_id
    inner join dpr_product_pricing.dim_product_competitor competitor
        on cpp.dim_competitor = competitor.competitor_id
    inner join dpr_product_pricing.dim_product_competitor_type competitor_type
        on(
            case
                when cpp.super_category = 'Fruver'
                    then competitor.product_competitor_type_id_fruver = competitor_type.competitor_type_id
                when cpp.super_category = 'Multicategoría'
                    then competitor.product_competitor_type_id_multicategoria = competitor_type.competitor_type_id
            end
        )
    inner join dpr_shared.dim_stock_unit su
        on cpp.dim_stock_unit = su.stock_unit_id
where quotation_date.full_date >= current_date - 10
    AND (
        competitor.competitor_name NOT ILIKE '%cayena%'
        AND (
            competitor.competitor_name <> 'Atacadao_V2'
            OR su.source_id IN {skus_scrapper}
        )
    )
    AND site.identifier_value IN ('SPO')

--GROUP BY 1,2,3,4
""".format(skus_scrapper = tuple(list(df_scrapper.source_id.unique()) + [1]))
df_zkkkkk = run_read_dwd_query(query)

df_zkkkkk = df_zkkkkk.dropna().reset_index(drop=True)
df_zkkkkk['lifetime'] = 8

In [56]:
df_zkkkkk = df_zkkkkk.loc[df_zkkkkk.site_code==city].reset_index(drop=True).copy()

In [57]:
# Ensure dataframe is sorted by 'quotation_date'
df_zkkkkk = df_zkkkkk.sort_values(by='quotation_date')

# Generate the required rows for missing dates
new_rows = []

for (competitor, source_id), group in df_zkkkkk.groupby(['competitor_name', 'source_id']):
    group = group.sort_values(by='quotation_date')
    last_known_price = None
    last_known_date = None
    lifetime = 8
    
    for current_index in range(len(group)):
        current_date = group.iloc[current_index]['quotation_date']
        price = group.iloc[current_index]['price']
        
        # If this is not the first iteration, fill in missing dates
        if last_known_date is not None:
            days_diff = (current_date - last_known_date).days
            if days_diff > 1:
                for j in range(1, min(days_diff, lifetime + 1)):
                    new_date = last_known_date + timedelta(days=j)
                    new_row = {
                        'site_code': group.iloc[current_index]['site_code'],
                        'quotation_date': new_date,
                        'competitor_name': competitor,
                        'source_id': source_id,
                        'price': last_known_price,
                        'lifetime': lifetime - j
                    }
                    new_rows.append(new_row)
                    
                    # Stop if we reach a new datapoint date
                    if new_date + timedelta(days=1) == current_date:
                        break
        
        # Update the last known values and reset lifetime
        last_known_price = price
        last_known_date = current_date
        lifetime = 8  # Reset lifetime

    # After processing all known dates for the group, continue generating rows until lifetime reaches 0
    while lifetime > 0:
        last_known_date += timedelta(days=1)
        new_row = {
            'site_code': group.iloc[-1]['site_code'],
            'quotation_date': last_known_date,
            'competitor_name': competitor,
            'source_id': source_id,
            'price': last_known_price,
            'lifetime': lifetime - 1
        }
        new_rows.append(new_row)
        lifetime -= 1

# Append new rows to the dataframe
df_zkkkkk = df_zkkkkk.append(new_rows, ignore_index=True)

In [58]:
df = pd.merge(dfq1.loc[dfq1.city == city],dfq2,left_on=['source_id'],right_on=['source_id'],how='inner')
df = df.sort_values(by=['cat','subcat', 'cluster','gmv_mix'], ascending=[False, True, True,False]).reset_index(drop=True)

In [59]:
df_bench = df_zkkkkk.loc[df_zkkkkk.quotation_date == todays_date.date()].reset_index(drop=True).copy()

# Function to calculate the required statistics
def calculate_statistics(df):
    return df.groupby('source_id')['price'].agg(
        num_data_points='count',
        num_competitors=lambda x: df.loc[x.index, 'competitor_name'].nunique(),
        min_price='min',
        avg_price='mean',
        median_price='median',
        max_price='max',
        #delta_min_max=lambda x: x.max() - x.min()
    ).reset_index()

# Calculate statistics for all competitors
stats_all = calculate_statistics(df_bench)
stats_all.columns = ['source_id', 'num_data_points_all', 'num_competitors_all', 'min_price_all', 'avg_price_all', 'Med ALL', 'max_price_all']

# Filter for competitors that include "assai" in their name and calculate statistics
df_assai = df_bench[df_bench['competitor_name'].str.contains('assaí', case=False, na=False)]
stats_assai = calculate_statistics(df_assai)
stats_assai.columns = ['source_id', 'num_data_points_assai', 'num_competitors_assai', 'min_price_assai', 'avg_price_assai', 'Med Assai', 'max_price_assai']

# Filter for competitors that include "atacadao" or "atacadão" in their name and calculate statistics
df_atacadao = df_bench[df_bench['competitor_name'].str.match(r'(?i)^atacad[aã]o') & ~df_bench['competitor_name'].str.contains(r'(?i)^Atacadao_V2$')]
stats_atacadao = calculate_statistics(df_atacadao)
stats_atacadao.columns = ['source_id', 'num_data_points_atacadao', 'num_competitors_atacadao', 'min_price_atacadao', 'avg_price_atacadao', 'Med Atacadao', 'max_price_atacadao']

# Filter for competitors that include "atacadao_v2" the scrapper
df_scrapper_atacadao = df_bench[df_bench['competitor_name'].str.contains(r'(?i)^Atacadao_V2$')]
stats_scrapper_atacadao = calculate_statistics(df_scrapper_atacadao)
stats_scrapper_atacadao.columns = ['source_id', 'num_data_points_atacadao_scrapper', 'num_competitors_atacadao_scrapper', 'Scrp. Atacadao', 'avg_price_atacadao_scrapper', 'Med atacadao_scrapper', 'max_price_atacadao_scrapper']

# Merge the results
bench_df = stats_all.merge(stats_assai, on='source_id', how='left').merge(stats_atacadao, on='source_id', how='left').merge(stats_scrapper_atacadao, on='source_id', how='left')

# PRINTS

## PRINT DESCRIPTIVE INFO

In [60]:
# Get the manual info in the file
df_info_sheet = df_sheet.get_as_dataframe('info_to_py')

In [61]:
# List of columns to be divided by 100
columns_to_divide = ['penet', 'gmv_mix', 'mg', 'nmg', 'dct']

# Divide the specified columns by 100
df_print = df.copy()
df_print[columns_to_divide] = df_print[columns_to_divide] / 100

# Se pegan los parametros de la estrategia establecida para mantenerse constante
df_print = pd.merge(df_print,df_info_sheet,left_on=['source_id'],right_on=['ID'],how='left')
df_print.drop(columns=['ID'], inplace=True)

In [62]:
# PEGAR INFO
df_sheet.set_with_dataframe(df_print, worksheet='info', clear=True, autocreate=True)

## PRINT BENCH INFO

Aca dejamos las primeras 19 columnas porque no queremos incluir las columnas del SCRAPPER de Atacadao

In [63]:
# PEGAR INFO
df_sheet.set_with_dataframe(bench_df.iloc[:, :19], worksheet='data', clear=True, autocreate=True)

## PRINT MIN BENCH INFO

In [64]:
# Group by source_id and get the row with the minimum price
min_price_idx = df_bench.groupby('source_id')['price'].idxmin()

# Use these indices to get the rows with the minimum price
df_bench_min = df_bench.loc[min_price_idx, ['source_id', 'competitor_name', 'price']]

In [65]:
# PEGAR INFO
df_sheet.set_with_dataframe(df_bench_min, worksheet='min_bench', clear=True, autocreate=True)

## APPLYING RULES

### APLICAR ESTRATEGIA

##### 1. NEW OR NULL VALUES

In [66]:
# Paso 1: Encontrar las filas donde 'Strategy' es nulo
na_rows = df_print[df_print['Strategy'].isna()]

# Paso 2: Reemplazar valores nulos con valores predeterminados
df_print.loc[na_rows.index, 'Tipo producto'] = df_print.loc[na_rows.index, 'Tipo producto'].fillna('Mais barato')
df_print.loc[na_rows.index, 'Index'] = df_print.loc[na_rows.index, 'Index'].fillna(1)
df_print.loc[na_rows.index, 'Frecuency'] = df_print.loc[na_rows.index, 'Frecuency'].fillna('Diaria')
df_print.loc[na_rows.index, 'Strategy'] = df_print.loc[na_rows.index, 'Strategy'].fillna('Med Atacadao')

# Paso 3: Calcular el promedio de 'Min margin' para cada subcategoría y usarlo para llenar los valores nulos en 'Min margin'
avg_margin_by_subcat = df_print.groupby('subcat')['Min margin'].transform('mean')
df_print.loc[na_rows.index, 'Min margin'] = df_print.loc[na_rows.index, 'Min margin'].fillna(avg_margin_by_subcat)

In [67]:
df_stg = df_print.loc[~df_print['Strategy'].isna()].reset_index(drop=True).copy()

### 1.5 Add Indexes Dct. Proveedor

In [68]:
# For update Index
df_fix_prov_2 = df_fix_prov.loc[(df_fix_prov.fecha_inicio <= str(todays_date.date())) & (df_fix_prov.fecha_fin > str(todays_date.date()))]
# For turn back Index
df_fix_prov_3 = df_fix_prov.loc[(pd.to_datetime(df_fix_prov.fecha_fin)) == todays_date + timedelta(days=-1)]

In [69]:
# Create a dictionary from df_fix in order to get back the previous Index !!
fix_index_dict_back = df_fix_prov_3.set_index('source_id')['INDEX ACTUAL'].to_dict()

# Apply the updates
mask_prov_back = df_stg['source_id'].isin(fix_index_dict_back.keys())
df_stg.loc[mask_prov_back, 'Index'] = df_stg.loc[mask_prov_back, 'source_id'].map(fix_index_dict_back)

In [70]:
# Create a dictionary from df_fix in order to get to the desired Index !!
fix_index_dict = df_fix_prov_2.set_index('source_id')['INDEX FIJO'].to_dict()

# Apply the updates
mask_prov = df_stg['source_id'].isin(fix_index_dict.keys())
df_stg.loc[mask_prov, 'Index'] = df_stg.loc[mask_prov, 'source_id'].map(fix_index_dict)

#### 2. Logic to define the price

In [71]:
def formula(row):
    # Obtener la estrategia y el source_id
    primary_strategy = row['Strategy']
    source_id = row['source_id']
    index = row['Index']

    # Filtrar bench_df para el source_id específico
    filtered_bench = bench_df[bench_df['source_id'] == source_id]
    
    # Inicializar bench_value
    bench_value = None
    
    # Estrategias en orden de preferencia
    strategies = ['Scrp. Atacadao','Med Atacadao', 'Med Assai', 'Med ALL']
    
    # Encontrar el índice de la estrategia primaria
    if source_id in df_assai_sht.source_id.unique():
        primary_index = 2
    else:
        primary_index = 0#strategies.index(primary_strategy)
    
    # Verificar la estrategia primaria y las siguientes en el orden de preferencia
    for strategy in strategies[primary_index:]:
        if not filtered_bench.empty and pd.notna(filtered_bench[strategy].values[0]):
            bench_value = filtered_bench[strategy].values[0]
            break
    
    # Si no se encontró un valor válido en las estrategias, usar el valor fallback
    if bench_value is None:
        if row['mg'] > row['Min margin']:
            fallback_value = row['net_price']
        else:
            fallback_value = round((row['cost'] / (1 - row['Min margin']))*(1-row['dct']), 2) #incluimos dct
        return 'Margin', None, fallback_value, 'NO BENCH'
    

    # New Bench Values Based On Index
    bench_value = bench_value*index #Aca multiplica por el Index
    # Calcular el nuevo margen
    new_margin = 1 - (row['cost'] / (bench_value*(1+row['dct']))) #new margin gross
    
    # Comprobar si el nuevo margen es mayor que el margen mínimo
    if new_margin > row['Min margin']:
        # Si es así, retornar el valor de referencia como el nuevo precio y el bench_value como new_bench
        return strategy, round(bench_value, 2), round(bench_value, 2), 'PRICED BENCH'
    else:
        # De lo contrario, retornar el valor fallback y el bench_value como new_bench
        fallback_value = round((row['cost'] / (1 - row['Min margin']))*(1-row['dct']), 2)
        return strategy, round(bench_value, 2), fallback_value, 'PRICED BY MIN MARGIN'

# Aplicar la función a cada fila y desempaquetar los resultados en dos nuevas columnas
df_stg[['Strategy','new_bench', 'new_price','explanation']] = df_stg.apply(lambda row: pd.Series(formula(row)), axis=1)

#### 3. Logic to apply elasticities (B. Suaves)

In [72]:
df_info_ryd = df_sheet.get_as_dataframe('elasticity')

In [73]:
df_bs = pd.merge(df_stg,df_info_ryd,left_on=['source_id'],right_on=['source_id'],how='left')
df_bs['uplift'] = 100.00*((df_bs.new_price/df_bs.net_price)-1)

In [74]:
from scipy.stats import percentileofscore

# Function to calculate new_uplift for each city
def calculate_new_uplift(df):
    # Calculate percentiles for each value in the mean_edpv column
    percentiles = df['mean_edpv'].apply(lambda x: percentileofscore(df['mean_edpv'], x) / 100.0)
    
    # Apply the transformation (1 - percentile) and map it to the range [2, 10]
    mapped_values = 4 + (1 - percentiles) * (10 - 4)
    
    # Calculate new_uplift by considering the sign of the original uplift
    df['new_uplift'] = np.where(df['uplift'] >= 0, np.minimum(mapped_values, df['uplift']),
                                np.maximum(-mapped_values, df['uplift']))
    
    return df

In [75]:
# Apply the calculation for each city separately
df_bs = df_bs.groupby('city').apply(calculate_new_uplift)

# Handle rows with NaN in mean_edpv separately if needed
df_bs_na = df_bs[df_bs['mean_edpv'].isna()].copy()

# Adjust the assignment logic to consider the sign of uplift
df_bs_na['new_uplift'] = np.where(df_bs_na.cluster == 'KVI', 
                                  np.where(df_bs_na.uplift >= 0, np.minimum(4, df_bs_na.uplift), np.maximum(-4, df_bs_na.uplift)),
                       np.where(df_bs_na.cluster == 'MID', 
                                np.where(df_bs_na.uplift >= 0, np.minimum(7, df_bs_na.uplift), np.maximum(-7, df_bs_na.uplift)),
                       np.where(df_bs_na.uplift >= 0, np.minimum(10, df_bs_na.uplift), np.maximum(-10, df_bs_na.uplift))))

# Update the original DataFrame with new_uplift for NaN mean_edpv
df_bs.loc[df_bs_na.index, 'new_uplift'] = df_bs_na['new_uplift']

# Calculate new_price and round to 2 decimal places
df_bs['new_price'] = (df_bs.net_price * (1 + df_bs.new_uplift / 100)).round(2)

In [76]:
# If bajadas suaves was applied
df_bs['explanation'] = np.where((df_bs['uplift'] != df_bs['new_uplift']) & (df_bs['explanation'] != "PRICED BY MIN MARGIN"), "B. SUAVES", df_bs['explanation'])

# New price with bajadas suaves
df_stg['new_price'] = df_bs['new_price']

# New price with bajadas suaves
df_stg['explanation'] = df_bs['explanation']

# Validate strategy of refresh
df_stg['Frecuency'] = np.where(df_stg['cluster'] == 'KVI', 'Diaria', 'Semanal')

## Logica para redondear a .09 los final price

In [77]:
# Function to round to nearest .09
def round_to_nearest_09(value):
    # Find the integer part and the fractional part
    integer_part = np.floor(value)
    fractional_part = value - integer_part

    # Find the closest multiple of 0.09
    rounded_fraction = round(fractional_part / 0.10) * 0.10 - 0.01
    
    # Combine the integer part with the rounded fractional part
    return np.round(integer_part + rounded_fraction, 2)

# Apply the function to the DataFrame
df_stg['new_price'] = df_stg['new_price'].apply(round_to_nearest_09)

## Pegar EXPLANATION

In [78]:
df_fix = df_sheet.get_as_dataframe('FIX PRICES')
df_fix = df_fix.loc[(df_fix.fecha_inicio <= str(todays_date.date())) & (df_fix.fecha_fin > str(todays_date.date()))]

In [79]:
# Create a dictionary from df_fix for quick lookup
fix_price_dict = df_fix.set_index('source_id')['precio_fijo'].to_dict()

# Apply the updates
mask = df_stg['source_id'].isin(fix_price_dict.keys())
df_stg.loc[mask, 'new_price'] = df_stg.loc[mask, 'source_id'].map(fix_price_dict)
df_stg.loc[mask, 'Strategy'] = 'Fix Price'
df_stg.loc[mask, 'explanation'] = 'Fix Price CatMan'

In [80]:
# Aca Para las negociaciones dct proveedor
df_stg.loc[mask_prov, 'Strategy'] = 'Negotiation Prov'
df_stg.loc[mask_prov, 'explanation'] = 'Fix Price CatMan'

In [81]:
# PEGAR INFO
df_sheet.set_with_dataframe(df_stg.loc[:, ['source_id','explanation']], worksheet='explanation', clear=True, autocreate=True)

# Seleccionar columnas de la 6 a la 9 (índices 5 a 8)
# subset_columns = df_print.iloc[:, 5:9]

## Pegar INFO FINAL

In [82]:
# PEGAR INFO
df_sheet.set_with_dataframe(df_stg.iloc[:, -8:], worksheet='info_to_sheet', clear=True, autocreate=True)

# Seleccionar columnas de la 6 a la 9 (índices 5 a 8)
# subset_columns = df_print.iloc[:, 5:9]

In [83]:
# =IFERROR(
#   IFS(
#     OR(
#       AND(AC9<>"",OR(AC9>=75%,AC9<=-30%,L9/J9<15%))
#     ), "OUTLIER",
    
#     OR(
#       OR(R9="NO TOCAR",U9="Fix Price")
#     ), "FIX/NO TOCAR",

#     OR(
#       AND(W9<>"", C9="KVI", ACS(AC9)<=2%, ACS(AG9)<=5%, AN9>=-15%),
#       AND(W9<>"", C9<>"KVI", ACS(AC9)<=5%),
#       AND(C9="TAIL"),
#       AND(W9="", C9="KVI", ACS(AG9)<=5%, AN9>=-15%),
#       AND(W9="", C9<>"KVI")
#     ), "OK",

#     OR(      
#       AND(W9<>"", ACS(AC9)>5%, ACS(AG9)>5%, AN9<-15%),
#       AND(W9="", C9="KVI", ACS(AG9)>5%, AN9<0%),
#       AND( C9<>"TAIL", AN9<-50%)
#     ), "Alert",
    
#     OR(
#       AND(W9<>"", C9="KVI",OR(ACS(AC9)>2%, ACS(AG9)>5%, AN9>=-100%)),
#       AND(W9<>"", C9="MID",OR(ACS(AC9)>5%)),
#       AND(W9="", C9="KVI")
#     ), "Review",
    
#     TRUE, "Check"  
#   ), 
#   "OK" 
# )