In [1]:
#//----------------------------
#//LIBRARIES
    #Math
import math
    #Numeric Python
import numpy as np
    #Pandas (dataframes)
import pandas as pd
    #datetime for fate manipulation
from datetime import date, datetime, timedelta  
    #Regex for advanced string matching
import re
    #for time related stuff
import time 
    #json library
import json
    #Analyst tools
import sys
sys.path.append('../')
from analysts_tools.growth import *
    #Procurement tools
from analysts_tools.redash_methods import *
from analystcommunity.read_connection_data_warehouse import run_read_dwd_query, run_read_prod_query

import random
import datetime

import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

from procurement_lib import GoogleSheet

In [2]:
todays_date = datetime.datetime.today().strftime('%Y-%m-%d')

In [3]:
query = """
select
    competitor.competitor_name,
    site.identifier_value as site_code,
    quotation_date.full_date AS quotation_date,
    su.source_id,
    ROUND(MEDIAN(cpp.product_selected_price),2)::float as price
from dpr_product_pricing.fact_collected_product_prices cpp
    inner join dpr_shared.dim_date quotation_date
        on cpp.dim_quotation_date = quotation_date.date_id
    inner join dpr_shared.dim_time quotation_time
        on cpp.dim_quotation_time = quotation_time.time_id
    inner join dpr_shared.dim_site site
        on cpp.dim_site = site.site_id
    inner join dpr_shared.dim_category cat
        on cpp.dim_category = cat.category_id
    inner join dpr_product_pricing.dim_product_outlier_type outlier_type
        on cpp.dim_outlier_type = outlier_type.outlier_type_id
    inner join dpr_product_pricing.dim_product_source_type source_type
        on cpp.dim_source_type = source_type.source_type_id
    inner join dpr_product_pricing.dim_product_competitor competitor
        on cpp.dim_competitor = competitor.competitor_id
    inner join dpr_product_pricing.dim_product_competitor_type competitor_type
        on(
            case
                when cpp.super_category = 'Fruver'
                    then competitor.product_competitor_type_id_fruver = competitor_type.competitor_type_id
                when cpp.super_category = 'Multicategoría'
                    then competitor.product_competitor_type_id_multicategoria = competitor_type.competitor_type_id
            end
        )
    inner join dpr_shared.dim_stock_unit su
        on cpp.dim_stock_unit = su.stock_unit_id
where quotation_date.full_date >= '2024-05-01'
    --AND quotation_date.full_date <= '2023-12-22'
    AND competitor.competitor_name NOT ILIKE '%cayena%'
    AND site.identifier_value IN ('SPO')
    --AND (competitor.competitor_name ILIKE '%assaí%' OR competitor.competitor_name ILIKE '%atacadao%' OR competitor.competitor_name ILIKE '%atacadão%')
    --AND competitor.competitor_name <> 'Atacadao_V2'
GROUP BY 1,2,3,4
"""
df_zkkkkk = run_read_dwd_query(query)

df_zkkkkk = df_zkkkkk.dropna().reset_index(drop=True)
df_zkkkkk['lifetime'] = 8

In [4]:
# Ensure dataframe is sorted by 'quotation_date'
df_zkkkkk = df_zkkkkk.sort_values(by='quotation_date')

# Generate the required rows for missing dates
new_rows = []

for (competitor, source_id), group in df_zkkkkk.groupby(['competitor_name', 'source_id']):
    group = group.sort_values(by='quotation_date')
    last_known_price = None
    last_known_date = None
    lifetime = 8
    
    for current_index in range(len(group)):
        current_date = group.iloc[current_index]['quotation_date']
        price = group.iloc[current_index]['price']
        
        # If this is not the first iteration, fill in missing dates
        if last_known_date is not None:
            days_diff = (current_date - last_known_date).days
            if days_diff > 1:
                for j in range(1, min(days_diff, lifetime + 1)):
                    new_date = last_known_date + timedelta(days=j)
                    new_row = {
                        'site_code': group.iloc[current_index]['site_code'],
                        'quotation_date': new_date,
                        'competitor_name': competitor,
                        'source_id': source_id,
                        'price': last_known_price,
                        'lifetime': lifetime - j
                    }
                    new_rows.append(new_row)
                    
                    # Stop if we reach a new datapoint date
                    if new_date + timedelta(days=1) == current_date:
                        break
        
        # Update the last known values and reset lifetime
        last_known_price = price
        last_known_date = current_date
        lifetime = 8  # Reset lifetime

    # After processing all known dates for the group, continue generating rows until lifetime reaches 0
    while lifetime > 0:
        last_known_date += timedelta(days=1)
        new_row = {
            'site_code': group.iloc[-1]['site_code'],
            'quotation_date': last_known_date,
            'competitor_name': competitor,
            'source_id': source_id,
            'price': last_known_price,
            'lifetime': lifetime - 1
        }
        new_rows.append(new_row)
        lifetime -= 1

# Append new rows to the dataframe
df_zkkkkk = df_zkkkkk.append(new_rows, ignore_index=True)

# Sort the final dataframe
df_zkkkkk = df_zkkkkk.sort_values(by=['competitor_name', 'source_id', 'quotation_date'])
df_zkkkkk['replica'] = df_zkkkkk['lifetime'] == 8

In [5]:
df_bench = df_zkkkkk.copy()
df_bench['quotation_date'] = pd.to_datetime(df_bench['quotation_date'])

In [6]:
df_bench.site_code.value_counts()

SPO    1165577
Name: site_code, dtype: int64

In [7]:
query = """
WITH RECURSIVE calendar(calendar_date) AS (
  SELECT DATE_TRUNC('day', DATE(GETDATE()) - INTERVAL '300 day')
  UNION ALL
  SELECT calendar_date + INTERVAL '1 day'
  FROM calendar
  WHERE calendar_date BETWEEN DATE_TRUNC('day', DATE(GETDATE()) - INTERVAL '300 day') AND DATE(GETDATE() - 1) 
),

info AS (
SELECT
    DATE(coalesce(prices.last_modified_at, prices.created_at)) as created_at,
    pp.frida_id as source_id,
    MIN(coalesce(tiers.tax_price, prices.tax_price)) as price,
    MIN(coalesce(tiers.sale_price, prices.sale_price)) as net_price

FROM postgres_growth."growth_pricing.prices_history" prices
LEFT JOIN postgres_growth."growth_pricing.price_tiers_history" tiers ON prices.id = tiers.price_history_id
LEFT JOIN postgres_growth."growth_pricing.skus" skus ON prices.sku_id = skus.id
LEFT JOIN postgres_main_co."purchase_orders.products" p ON skus.sku_id = p.frida_id
LEFT JOIN postgres_main_co."purchase_orders.products" pp ON COALESCE(p.parent_id, p.id) = pp.id

WHERE DATE(prices.created_at) >= DATE_TRUNC('day', DATE(GETDATE()) - INTERVAL '300 day')
 AND p.region_code IN ('SPO')
 AND p.deleted_at IS NULL
 AND prices.created_by NOT ILIKE '%CATALOG%'
 AND pp.product_category_id IN (5,6,7,8,9,10,13,18) -- 1 ES FRUVER
 AND pp.frida_id IN {skus}
GROUP BY 1,2--,3,4
),

done AS (
SELECT
  DATE(c.calendar_date) AS quotation_date,
  --s.region,
  --s.parent_product_name,
  (s.source_id)::int as source_id,
  LAG(i.price IGNORE NULLS) OVER (PARTITION BY s.source_id ORDER BY c.calendar_date)::FLOAT AS p_price_tool,
  LAG(i.net_price IGNORE NULLS) OVER (PARTITION BY s.source_id ORDER BY c.calendar_date)::FLOAT AS net_price_tool


FROM calendar c
CROSS JOIN (SELECT DISTINCT source_id FROM info) s
LEFT JOIN info i ON c.calendar_date = i.created_at-1 AND s.source_id = i.source_id
)

SELECT *
FROM done
WHERE net_price_tool IS NOT NULL
 AND quotation_date >= '2024-05-01'
""".format(skus=tuple(df_bench.source_id.unique()))
df = run_read_prod_query(query)  
df['quotation_date'] = pd.to_datetime(df['quotation_date'])

In [8]:
df = pd.merge(df_bench, df, left_on=['source_id','quotation_date'], right_on=['source_id','quotation_date'], how='inner')

In [9]:
query = """
WITH clients AS (
SELECT
    s.identifier_value AS site_code,
    TO_CHAR(DATE(fs.order_submitted_date),'YYYY-WW') AS week,
    DATE(fs.order_submitted_date) AS quotation_date,
    COUNT(DISTINCT fs.dim_customer)::FLOAT AS customers

FROM dpr_sales.fact_sales                   fs
INNER JOIN dpr_shared.dim_site              s   ON s.site_id = fs.dim_site
INNER JOIN dpr_shared.dim_product           dp  ON dp.product_id = fs.dim_product
INNER JOIN dpr_shared.dim_category          cat ON cat.category_id = dp.category_id

WHERE 
    fs.gmv_enabled = TRUE
    AND fulfillment_order_status NOT IN ('CANCELLED', 'ARCHIVED','No value')
    AND fs.fb_order_status_id IN (1,6,7,8)
    AND fs.is_deleted = FALSE
    AND cat.parent_description = 'Bebidas'
    AND fs.dim_status = 1
    AND dp.is_slot = 'false'
    AND fs.gmv_pxq_local > 0
    AND s.identifier_value IN ('SPO')
    AND DATE(fs.order_submitted_date) >= '2024-05-01'
GROUP BY 1,2,3
)


SELECT
    s.identifier_value AS site_code,
    DATE(fs.order_submitted_date) AS quotation_date,
    TO_CHAR(DATE(fs.order_submitted_date),'YYYY-WW') AS week,
    cat.parent_description AS cat,
    sup.source_id,
    sup.description,
    clt.customers as clients,
    COUNT(DISTINCT fs.dim_customer)::FLOAT AS customers,
    SUM(fs.product_quantity_x_step_unit)::FLOAT AS cant,
    (SUM(fs.gmv_pxq_local)/4.75)::FLOAT AS gmv_usd,
    SUM(fs.product_price*fs.product_quantity_x_step_unit)/cant AS product_price,
    SUM(fs.product_price_discount*fs.product_quantity_x_step_unit)/cant AS product_price_discount,
    gmv_usd/SUM(gmv_usd) OVER (PARTITION BY s.identifier_value)::FLOAT AS gmv_mix,
    AVG(CASE WHEN dtd.dtd_cost_local = 0 THEN m.inventory_p_fin ELSE dtd.dtd_cost_local END)::float as costo

FROM dpr_sales.fact_sales                   fs
INNER JOIN dpr_shared.dim_site              s   ON s.site_id = fs.dim_site
INNER JOIN dpr_shared.dim_product           dp  ON dp.product_id = fs.dim_product
INNER JOIN dpr_shared.dim_category          cat ON cat.category_id = dp.category_id
INNER JOIN dpr_shared.dim_stock_unit        su  ON su.product_id = fs.dim_product
INNER JOIN dpr_shared.dim_stock_unit        sup  ON nvl(nullif(su.source_parent_id,0),su.source_id) = sup.source_id
LEFT JOIN dpr_cross_business.int_dtd_cost   dtd ON DATE(fs.order_submitted_date)-1 = to_date(dtd.dim_date_dtd::text,'YYYYMMDD')  and sup.stock_unit_id = dtd.dim_stock_unit
LEFT JOIN dpr_cross_business.fact_cross_business_insights m ON m.dim_stock_unit = sup.stock_unit_id AND m.dim_date = fs.dim_submitted_date
INNER JOIN clients                          clt ON clt.site_code = s.identifier_value and TO_CHAR(DATE(fs.order_submitted_date),'YYYY-WW') = clt.week
WHERE 
    fs.gmv_enabled = TRUE
    AND fulfillment_order_status NOT IN ('CANCELLED', 'ARCHIVED','No value')
    AND fs.fb_order_status_id IN (1,6,7,8)
    AND fs.is_deleted = FALSE
    AND cat.super_category = 'Multicategoría'
    AND fs.dim_status = 1
    AND dp.is_slot = 'false'
    AND fs.gmv_pxq_local > 0
    AND s.identifier_value IN ('SPO')
    AND DATE(fs.order_submitted_date) >= '2024-05-01'
    --AND DATE(fs.order_submitted_date) <= '2023-11-30'
GROUP BY 1,2,3,4,5,6,7
HAVING costo > 0
"""
df_gmv = run_read_dwd_query(query)
df_gmv['quotation_date'] = pd.to_datetime(df_gmv['quotation_date'])

In [10]:
query = """
 SELECT 
        sup.source_id,
        type AS cluster,
        cat.parent_description AS cat,
        ROW_NUMBER() OVER (PARTITION BY sup.source_id ORDER BY c.last_modified_at DESC, cluster DESC) AS rn
    FROM 
        dpr_product_pricing.dim_sku_cluster_period c
     INNER JOIN dpr_shared.dim_stock_unit        su  ON su.sku = c.sku
     INNER JOIN dpr_shared.dim_stock_unit        sup  ON nvl(nullif(su.source_parent_id,0),su.source_id) = sup.source_id
     INNER JOIN dpr_shared.dim_category          cat  ON cat.category_id = sup.category_id
    where 
        c.site_id in (4,6,9,11)
"""
df_type = run_read_dwd_query(query)
df_type = df_type.loc[df_type.rn == 1,['source_id','cluster']].copy()

In [11]:
df_gmv = pd.merge(df_gmv, df_type, left_on=['source_id'], right_on=['source_id'], how='left')
df_gmv['cluster'] = df_gmv['cluster'].fillna('TAIL')

In [12]:
df = df.loc[df.source_id.isin(df_gmv.loc[df_gmv.cat == 'Bebidas'].source_id.unique())].reset_index(drop=True).copy()
df_gmv = df_gmv.loc[df_gmv.source_id.isin(df_gmv.loc[df_gmv.cat == 'Bebidas'].source_id.unique())].reset_index(drop=True).copy()

In [13]:
import pandas as pd

# Supongamos que 'df' es tu DataFrame original
# Agrupamos por 'source_id' y 'quotation_date'
grouped = df.groupby(['source_id', 'quotation_date'])

# Aplicamos las funciones de agregación, incluyendo los percentiles de 'price' y los promedios de 'p_price_tool' y 'net_price_tool'
aggregated = grouped.agg({
    'price': [
        ('min', 'min'),
        ('p10', lambda x: x.quantile(0.10)),
        ('p25', lambda x: x.quantile(0.25)),
        ('p50', lambda x: x.quantile(0.50)),
        ('p75', lambda x: x.quantile(0.75)),
        ('max', 'max')
    ],
    'p_price_tool': 'mean',
    'net_price_tool': 'mean'
}).reset_index()

# Aplanar las columnas para eliminar la multi-nivel
aggregated.columns = ['_'.join(col).strip() if col[1] else col[0] for col in aggregated.columns]

In [14]:
df2 = pd.merge(df_gmv,aggregated,left_on=['source_id','quotation_date'],right_on=['source_id','quotation_date'],how='left')

df3 = pd.merge(df,df_gmv,left_on=['source_id','quotation_date','site_code'],right_on=['source_id','quotation_date','site_code'],how='left')

In [15]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

# Asegurarnos de que quotation_date esté en formato numérico (ej. días desde un punto inicial)
df2['date_ordinal'] = df2['quotation_date'].map(lambda x: x.toordinal())

# Eliminar filas con NaN o valores infinitos en 'gmv_usd'
df2_clean = df2[np.isfinite(df2['gmv_usd'])]

# Función para calcular la pendiente de GMV vs quotation_date para cada source_id
def calculate_slope(group):
    X = group['date_ordinal'].values.reshape(-1, 1)
    y = group['gmv_usd'].values
    reg = LinearRegression().fit(X, y)
    return reg.coef_[0]

# Calcular la pendiente para cada source_id
slopes = df2_clean.groupby('source_id').apply(calculate_slope)
slopes_df = slopes.reset_index(name='slope')

# Identificar los source_id con mayor caída en GMV
top_falling_ids = slopes_df.nsmallest(15, 'slope')['source_id']

In [16]:
df3['gpi'] = df3.p_price_tool/df3.price
df3['npi'] = df3.net_price_tool/df3.price
df3['nmg'] = 1-(df3.costo/df3.net_price_tool)
df3['ncash_mg'] = df3.gmv_usd*df3.nmg

In [17]:
df2['gpi'] = df2.p_price_tool_mean/df2.price_p50
df2['npi'] = df2.net_price_tool_mean/df2.price_p50
df2['nmg'] = 1-(df2.costo/df2.net_price_tool_mean)
df2['ncash_mg'] = df2.gmv_usd*df2.nmg

In [18]:
df_gmv['nmg'] = 1-(df_gmv.costo/df_gmv.product_price_discount)
df_gmv['ncash_mg'] = df_gmv.gmv_usd*df_gmv.nmg

In [31]:
import pandas as pd
import plotly.express as px

# Supongamos que tu DataFrame se llama df3

# Obtener los valores únicos para el dropdown
source_id_options = df3['source_id'].unique()
source_id_options = sorted(source_id_options)

# Crear el gráfico 1: Línea para cada `competitor_name` donde x es `quotation_date` y y es `npi`
def create_plot1(source_id):
    df_filtered = df3[df3['source_id'] == source_id]

    # Eliminar filas donde 'quotation_date' o 'competitor_name' son NaN
    df_filtered = df_filtered.dropna(subset=['quotation_date', 'competitor_name'])

    # Crear un rango completo de fechas desde la mínima hasta la máxima fecha en el DataFrame
    all_dates = pd.date_range(start=df_filtered['quotation_date'].min(), 
                              end=df_filtered['quotation_date'].max())

    # Asegurarse de que todas las fechas están en el DataFrame y rellenar los días faltantes con NaN
    df_filtered.set_index('quotation_date', inplace=True)
    
    # Reindexar solo la fecha para cada grupo
    df_complete = df_filtered.groupby('competitor_name').apply(
        lambda group: group[~group.index.duplicated(keep='first')].reindex(all_dates, fill_value=None)
    ).reset_index(level=1, drop=False)

    
    # Renombrar las columnas correctamente
    df_complete.rename(columns={'level_1': 'quotation_date'}, inplace=True)

    # Eliminar filas donde 'npi' es NaN
    df_complete = df_complete.dropna(subset=['npi'])
    
    # Crear el gráfico usando plotly.express
    fig1 = px.line(df_complete, x='quotation_date', y='npi', color='competitor_name',
                   title='NPI por Competitor Name', labels={'quotation_date': 'Fecha', 'npi': 'NPI'})
    
    # Asegurarse de que los gaps no se conecten
    fig1.update_traces(connectgaps=False)
    
    # Ajustar el tamaño del gráfico, en este caso, aumentando la altura
    fig1.update_layout(
        height=600  # Puedes cambiar este valor para ajustar la altura según lo necesites
    )

    # Crear el gráfico usando plotly.express
    fig4 = px.line(df_complete, x='quotation_date', y='gpi', color='competitor_name',
                   title='GPI por Competitor Name', labels={'quotation_date': 'Fecha', 'gpi': 'GPI'})
    
    # Asegurarse de que los gaps no se conecten
    fig4.update_traces(connectgaps=False)
    
    # Ajustar el tamaño del gráfico, en este caso, aumentando la altura
    fig4.update_layout(
        height=600  # Puedes cambiar este valor para ajustar la altura según lo necesites
    )
    
    return fig1,fig4


# Crear el gráfico 2: Barras para `cant` y `customers` en dos ejes Y diferentes
import pandas as pd

def create_plot2(source_id):
    df_filtered = df_gmv[df_gmv['source_id'] == source_id]
    df_filtered = df_filtered.sort_values(by='quotation_date')

    # Gráfico diario
    fig2 = px.bar(df_filtered, x='quotation_date', y='cant', 
                  title='Cantidad y Clientes', labels={'quotation_date': 'Fecha', 'cant': 'Cantidad'})
    
    fig2.add_scatter(x=df_filtered['quotation_date'], y=df_filtered['customers'],
                     mode='lines+markers', name='Clientes', yaxis='y2')

    fig2.update_layout(
        yaxis2=dict(
            title='Clientes',
            overlaying='y',
            side='right'
        )
    )

    # Crear una nueva columna con la semana
    df_filtered['week'] = df_filtered['quotation_date'].dt.to_period('W').apply(lambda r: r.start_time)

    # Agrupar por semana y sumar los valores
    df_weekly = df_filtered.groupby('week').agg({'cant': 'sum', 'customers': 'sum'}).reset_index()

    # Gráfico semanal
    fig3 = px.bar(df_weekly, x='week', y='cant', 
                  title='Cantidad y Clientes por Semana', labels={'week': 'Semana', 'cant': 'Cantidad'})

    fig3.add_scatter(x=df_weekly['week'], y=df_weekly['customers'],
                     mode='lines+markers', name='Clientes', yaxis='y2')

    fig3.update_layout(
        yaxis2=dict(
            title='Clientes',
            overlaying='y',
            side='right'
        )
    )

    return fig2, fig3

sku = 363767
# Crear gráficos iniciales
fig1,fig4 = create_plot1(sku)
fig2,fig3 = create_plot2(sku)

print(top_falling_ids)
# Mostrar gráficos
fig1.show()
fig2.show()
fig3.show()
fig4.show()

452043 #ESTEE ES GRAVE index 90
363763 #ESTEE NO ENTIENDO PORQUE CAYO este ya mg 5%
277866 #ESTE VENIA MAL DESDE HACE RATO
363789 #VIENE CAYENDO DESDE JUN 16 NO ES POR PRICING
363775 #mirar caida, ver sandra
614937 #y si le bajo el rpecio?

#yo veo que todas las cervezas caen con margen 5% seria irse a margen 2%

580894 #muy buen ejemplo


49     452043
149    652815
12     277871
13     277873
33     363763
10     277866
37     363789
35     363775
74     597183
72     580894
56     483012
46     397323
80     614937
53     453298
28     284046
Name: source_id, dtype: int64


580894

In [20]:
# Define a function to apply the custom aggregation
def custom_ventas(group, column):
    return (100.00 * group[column] * group['gmv_usd']).sum()/group['gmv_usd'].sum()


# df_final = df2.groupby(['cat','quotation_date']).apply(lambda group: pd.Series({
#     'gmv_mix': 100.00*group['gmv_mix'].sum(),  # Compute sum of gmv_mix directly
#     'clients': group['clients'].mean(),
#     'gmv_usd': group['gmv_usd'].sum(),
#     'ncash_mg': group['ncash_mg'].sum(),
#     'npi': custom_ventas(group, 'npi')
# })).reset_index()


df_final2 = df2.groupby(['cat','week']).apply(lambda group: pd.Series({
    'gmv_mix': 100.00*group['gmv_mix'].sum(),  # Compute sum of gmv_mix directly
    'clients': group['clients'].mean(),
    'gmv_usd': group['gmv_usd'].sum(),
    'ncash_mg': group['ncash_mg'].sum(),
    'cant': group['cant'].sum(),
    'npi': custom_ventas(group, 'npi'),
    'gpi': custom_ventas(group, 'gpi')

})).reset_index()

In [21]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 89680 entries, 0 to 89679
Data columns (total 28 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   site_code               89680 non-null  object        
 1   quotation_date          89680 non-null  datetime64[ns]
 2   week                    89680 non-null  object        
 3   cat                     89680 non-null  object        
 4   source_id               89680 non-null  int64         
 5   description             89680 non-null  object        
 6   clients                 89680 non-null  float64       
 7   customers               89680 non-null  float64       
 8   cant                    89680 non-null  float64       
 9   gmv_usd                 89680 non-null  float64       
 10  product_price           89680 non-null  float64       
 11  product_price_discount  89680 non-null  float64       
 12  gmv_mix                 89680 non-null  float6

In [22]:
import pandas as pd
import plotly.express as px

# Supongamos que tu DataFrame se llama df_final2

# Obtener los valores únicos para el dropdown
cat_options = df_final2['cat'].unique()
cat_options = sorted(cat_options)

# Crear el gráfico 1: Línea para cada `competitor_name` donde x es `week` y y es `npi`
def create_plot1(cat):
    df_filtered = df_final2[df_final2['cat'] == cat]

    # Eliminar filas donde 'week' o 'competitor_name' son NaN
    # df_filtered = df_filtered.dropna(subset=['week', 'competitor_name'])

    # # Crear un rango completo de fechas desde la mínima hasta la máxima fecha en el DataFrame
    # all_dates = pd.date_range(start=df_filtered['week'].min(), 
    #                           end=df_filtered['week'].max())

    # # Asegurarse de que todas las fechas están en el DataFrame y rellenar los días faltantes con NaN
    # df_filtered.set_index('week', inplace=True)
    
    # # Reindexar solo la fecha para cada grupo
    # df_complete = df_filtered.groupby('competitor_name').apply(
    #     lambda group: group.reindex(all_dates, fill_value=None)
    # ).reset_index(level=1, drop=False)
    
    # # Renombrar las columnas correctamente
    # df_complete.rename(columns={'level_1': 'week'}, inplace=True)

    # # Eliminar filas donde 'npi' es NaN
    # df_complete = df_complete.dropna(subset=['npi'])
    
    # Crear el gráfico usando plotly.express
    fig1 = px.line(df_filtered, x='week', y='npi',
                   title='NPI por Competitor Name', labels={'week': 'Fecha', 'npi': 'NPI'})
    
    # Asegurarse de que los gaps no se conecten
    fig1.update_traces(connectgaps=False)
    
    # Ajustar el tamaño del gráfico, en este caso, aumentando la altura
    fig1.update_layout(
        height=600  # Puedes cambiar este valor para ajustar la altura según lo necesites
    )

    # Crear el gráfico usando plotly.express
    fig4 = px.line(df_filtered, x='week', y='gpi',
                   title='GPI por Competitor Name', labels={'week': 'Fecha', 'gpi': 'GPI'})
    
    # Asegurarse de que los gaps no se conecten
    fig4.update_traces(connectgaps=False)
    
    # Ajustar el tamaño del gráfico, en este caso, aumentando la altura
    fig4.update_layout(
        height=600  # Puedes cambiar este valor para ajustar la altura según lo necesites
    )
    
    return fig1,fig4


# Crear el gráfico 2: Barras para `gmv_usd` y `clients` en dos ejes Y diferentes
import pandas as pd

def create_plot2(cat):
    df_filtered = df_final2[df_final2['cat'] == cat]
    df_filtered = df_filtered.sort_values(by='week')

    # Gráfico diario
    fig2 = px.bar(df_filtered, x='week', y='gmv_usd', 
                  title='gmv_usdidad y Clientes', labels={'week': 'Fecha', 'gmv_usd': 'gmv_usdidad'})
    
    fig2.add_scatter(x=df_filtered['week'], y=df_filtered['clients'],
                     mode='lines+markers', name='Clientes', yaxis='y2')

    fig2.update_layout(
        yaxis2=dict(
            title='Clientes',
            overlaying='y',
            side='right'
        )
    )

    # Crear una nueva columna con la semana
    #df_filtered['week'] = df_filtered['week'].dt.to_period('W').apply(lambda r: r.start_time)

    # Agrupar por semana y sumar los valores
    #df_weekly = df_filtered.groupby('week').agg({'gmv_usd': 'sum', 'clients': 'sum'}).reset_index()

    # Gráfico semanal
    fig3 = px.bar(df_filtered, x='week', y='gmv_usd', 
                  title='gmv_usdidad y Clientes por Semana', labels={'week': 'Semana', 'gmv_usd': 'gmv_usdidad'})

    fig3.add_scatter(x=df_filtered['week'], y=df_filtered['clients'],
                     mode='lines+markers', name='Clientes', yaxis='y2')

    fig3.update_layout(
        yaxis2=dict(
            title='Clientes',
            overlaying='y',
            side='right'
        )
    )

    return fig2, fig3

sku = 'Bebidas'
# Crear gráficos iniciales
fig1,fig4 = create_plot1(sku)
fig2,fig3 = create_plot2(sku)

print(top_falling_ids)
# Mostrar gráficos
fig1.show()
fig2.show()
fig3.show()
fig4.show()

452043 #ESTEE ES GRAVE index 90
363763 #ESTEE NO ENTIENDO PORQUE CAYO este ya mg 5%
277866 #ESTE VENIA MAL DESDE HACE RATO
363789 #VIENE CAYENDO DESDE JUN 16 NO ES POR PRICING
363775 #mirar caida, ver sandra
614937 #y si le bajo el rpecio?

#yo veo que todas las cervezas caen con margen 5% seria irse a margen 2%

580894 #muy buen ejemplo


49     452043
149    652815
12     277871
13     277873
33     363763
10     277866
37     363789
35     363775
74     597183
72     580894
56     483012
46     397323
80     614937
53     453298
28     284046
Name: source_id, dtype: int64


580894

In [23]:
#df3.to_excel("info.xlsx")