In [2]:
#//LIBRARIES
import math
import numpy as np
import pandas as pd

import sys
import os 
sys.path.append(os.path.expanduser('~'))

from analysts_tools.growth import *

#Procurement tools
from procurement_lib import send_slack_notification,GoogleSheet,redash
from analystcommunity.read_connection_data_warehouse import run_read_dwd_query

from datetime import datetime, timedelta

In [3]:
# Informacion descriptiva de los productos prendidos en PAGINA
query = """
select
    competitor.competitor_name AS competitor_name,
    CASE
        WHEN (competitor.competitor_name ILIKE '%atacadao%' OR competitor.competitor_name ILIKE '%atacadão%') THEN 'Atacadao'
        WHEN competitor.competitor_name ILIKE '%assaí%' THEN 'Assai'
        ELSE 'ALL'
    END AS competitor_group,
    quotation_date.full_date AS quotation_date,
    su.source_id,
    ROUND(cpp.product_selected_price,2)::float as price--ROUND(MEDIAN(cpp.product_selected_price),2)::float as price
from dpr_product_pricing.fact_collected_product_prices cpp
    inner join dpr_shared.dim_date quotation_date
        on cpp.dim_quotation_date = quotation_date.date_id
    inner join dpr_shared.dim_time quotation_time
        on cpp.dim_quotation_time = quotation_time.time_id
    inner join dpr_shared.dim_site site
        on cpp.dim_site = site.site_id
    inner join dpr_shared.dim_category cat
        on cpp.dim_category = cat.category_id
    inner join dpr_product_pricing.dim_product_outlier_type outlier_type
        on cpp.dim_outlier_type = outlier_type.outlier_type_id
    inner join dpr_product_pricing.dim_product_source_type source_type
        on cpp.dim_source_type = source_type.source_type_id
    inner join dpr_product_pricing.dim_product_competitor competitor
        on cpp.dim_competitor = competitor.competitor_id
    inner join dpr_product_pricing.dim_product_competitor_type competitor_type
        on(
            case
                when cpp.super_category = 'Fruver'
                    then competitor.product_competitor_type_id_fruver = competitor_type.competitor_type_id
                when cpp.super_category = 'Multicategoría'
                    then competitor.product_competitor_type_id_multicategoria = competitor_type.competitor_type_id
            end
        )
    inner join dpr_shared.dim_stock_unit su
        on cpp.dim_stock_unit = su.stock_unit_id
where ((quotation_date.full_date between '2024-09-16' and '2024-09-29') or (quotation_date.full_date between '2024-10-07' and '2024-10-30'))
    AND source_type.description IN ('Infoprice')
    AND site.identifier_value IN ('SPO')

--GROUP BY 1,2,3,4
"""
df = run_read_dwd_query(query)

In [4]:
# Solo Kvis
df=df.loc[df.source_id.isin([384239,153089,572871,296389,588033,105608,93574,-314938,-314938,105599,356893,616289,-314887,-314887,277873,654639,654639,72497,73947,-314629,-314629,-314737,-314737,569233,646447,277874,-314770,-314770,277871,235489,452043,277866,483012,632851,165988,452047,75297,363767,597183,363775,168290,74194,396273,173891,284509,363763,633637,363754,597691,453302,278348,-314797,-314797,-314797,397295,511038,501933,160747,107178,202839,633619,307990,284513,458143,108497,592711,512674,141889,165672,647187,636349,397323,261188,221189,240307,426339,586807,631911,382175,511889,450359,643315,284517,417765,314595,284505,417745,107187,307988,160741,165678,141925,418958,510797,630359,217667,633625,240304,314589,518364,161239,240505,638099,108290,314592,418946,577339,366139,648741,613405,240413
])].reset_index(drop=True).copy()

# Convert quotation_date to datetime
df['quotation_date'] = pd.to_datetime(df['quotation_date'])

# Extract the weekday name
df['weekday'] = df['quotation_date'].dt.day_name()

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12736 entries, 0 to 12735
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   competitor_name   12736 non-null  object        
 1   competitor_group  12736 non-null  object        
 2   quotation_date    12736 non-null  datetime64[ns]
 3   source_id         12736 non-null  int64         
 4   price             12736 non-null  float64       
 5   weekday           12736 non-null  object        
dtypes: datetime64[ns](1), float64(1), int64(1), object(3)
memory usage: 597.1+ KB


In [6]:
# Paso 1: Calcular la mediana diaria de precios por 'source_id' y 'quotation_date'
df_daily_median = df.groupby(['source_id', 'quotation_date'])['price'].median().reset_index()
df_daily_median.rename(columns={'price': 'daily_median_price'}, inplace=True)

# Paso 2: Ordenar el DataFrame por 'source_id' y 'quotation_date' para asegurar el cálculo secuencial
df_daily_median = df_daily_median.sort_values(by=['source_id', 'quotation_date'])

# Paso 3: Calcular la varianza móvil de estas medianas diarias con una ventana de 3 días
df_daily_median['rolling_variance_3day'] = df_daily_median.groupby('source_id')['daily_median_price'].rolling(window=3).var().reset_index(level=0, drop=True)
df_daily_median['rolling_variance_2day'] = df_daily_median.groupby('source_id')['daily_median_price'].rolling(window=2).var().reset_index(level=0, drop=True)

In [None]:
import plotly.express as px

# Crear el gráfico con plotly express
fig = px.line(df_daily_median.loc[df_daily_median.source_id == 572871], x='quotation_date', y='rolling_variance_3day', color='source_id', 
              title="3-Day Rolling Variance of Daily Median Prices by Source ID",
              labels={'rolling_variance_2day': '2-Day Rolling Variance', 'quotation_date': 'Date', 'source_id': 'Source ID'})
fig.show()

fig = px.line(df_daily_median.loc[df_daily_median.source_id == 572871], x='quotation_date', y='rolling_variance_2day', color='source_id', 
              title="2-Day Rolling Variance of Daily Median Prices by Source ID",
              labels={'rolling_variance_2day': '2-Day Rolling Variance', 'quotation_date': 'Date', 'source_id': 'Source ID'})
# Mostrar el gráfico
fig.show()

ValueError: Value of 'y' is not the name of a column in 'data_frame'. Expected one of ['quotation_date', 'source_id', 'competitor_group', 'weekday', 'daily_median_price', 'next_day_median', 'daily_median_variance'] but received: rolling_variance_3day

In [8]:
import pandas as pd
import plotly.express as px

# Ejemplo de cómo cargar los datos en un DataFrame si no están cargados
# df = pd.read_csv("ruta_a_tu_archivo.csv") 

# 1. Calcular la varianza diaria de precios por source_id
df_daily_variance = df.groupby(['quotation_date', 'source_id', 'competitor_group', 'weekday']).agg(
    daily_price_variance=('price', 'var')
).reset_index()

# 2. Boxplot de la varianza diaria
fig_var = px.box(df_daily_variance, y='daily_price_variance', 
                 title="Daily Price Variance",
                 labels={'daily_price_variance': 'Variance of Daily Prices'})
fig_var.show()

# 3. Boxplot de la varianza diaria por día de la semana
fig_weekday = px.box(df_daily_variance, x='weekday', y='daily_price_variance', 
                     title="Daily Price Variance by Weekday",
                     labels={'daily_price_variance': 'Variance of Daily Prices', 'weekday': 'Day of the Week'})
fig_weekday.show()

# 4. Boxplot de la varianza diaria por grupo de competidor
fig_competitor_group = px.box(df_daily_variance, x='competitor_group', y='daily_price_variance', 
                              title="Daily Price Variance by Competitor Group",
                              labels={'daily_price_variance': 'Variance of Daily Prices', 'competitor_group': 'Competitor Group'})
fig_competitor_group.show()


In [9]:
import pandas as pd
import plotly.express as px

# 1. Calcular la mediana diaria de precios por quotation_date y source_id
df_daily_median = df.groupby(['quotation_date', 'source_id', 'competitor_group', 'weekday']).agg(
    daily_median_price=('price', 'median')
).reset_index()

# 2. Calcular la variación diaria de la mediana de precios por source_id
df_daily_median['next_day_median'] = df_daily_median.groupby('source_id')['daily_median_price'].shift(-1)
df_daily_median['daily_median_variance'] = (df_daily_median['daily_median_price'] - df_daily_median['next_day_median']).abs()

# Filtrar para eliminar valores NaN que se generan en el último día sin comparación al siguiente día
df_daily_median = df_daily_median.dropna(subset=['daily_median_variance'])

# 3. Crear un boxplot para la variación diaria de la mediana
fig_variance_median = px.box(df_daily_median, y='daily_median_variance', 
                             title="Daily Median Price Variance (Today vs. Next Day)",
                             labels={'daily_median_variance': 'Variance of Median Prices (Today vs. Next Day)'})
fig_variance_median.show()

# 4. Boxplot para la variación diaria de la mediana por día de la semana
fig_weekday_variance = px.box(df_daily_median, x='weekday', y='daily_median_variance', 
                              title="Daily Median Price Variance by Weekday",
                              labels={'daily_median_variance': 'Variance of Median Prices (Today vs. Next Day)', 'weekday': 'Day of the Week'})
fig_weekday_variance.show()

# 5. Boxplot para la variación diaria de la mediana por grupo de competidor
fig_competitor_variance = px.box(df_daily_median, x='competitor_group', y='daily_median_variance', 
                                 title="Daily Median Price Variance by Competitor Group",
                                 labels={'daily_median_variance': 'Variance of Median Prices (Today vs. Next Day)', 'competitor_group': 'Competitor Group'})
fig_competitor_variance.show()


In [14]:
import pandas as pd
import plotly.express as px

# 1. Calcular la mediana diaria de precios por quotation_date y source_id
df_daily_median = df.groupby(['quotation_date', 'source_id', 'competitor_group', 'weekday']).agg(
    daily_median_price=('price', 'median')
).reset_index()

# 2. Calcular la variación diaria de la mediana de precios por source_id
df_daily_median['next_day_median'] = df_daily_median.groupby('source_id')['daily_median_price'].shift(-1)
df_daily_median['daily_median_variance'] = (df_daily_median['daily_median_price'] - df_daily_median['next_day_median']).abs()

# Filtrar para eliminar valores NaN que se generan en el último día sin comparación al siguiente día
df_daily_median = df_daily_median.dropna(subset=['daily_median_variance'])

# 3. Crear un boxplot que muestre los días de la semana y el grupo de competidores para cada día
fig_weekday_competitor = px.box(df_daily_median.loc[df_daily_median.source_id == 572871], x='weekday', y='daily_median_variance', color='competitor_group',
                                title="Daily Median Price Variance by Weekday and Competitor Group",
                                labels={'daily_median_variance': 'Variance of Median Prices (Today vs. Next Day)',
                                        'weekday': 'Day of the Week',
                                        'competitor_group': 'Competitor Group'})
fig_weekday_competitor.update_layout(boxmode='group')  # Agrupar los boxplots por día de la semana
fig_weekday_competitor.show()


In [11]:
# 3. Crear un boxplot que muestre los días de la semana y el grupo de competidores para cada día
fig_weekday_competitor = px.box(df_daily_median, x='weekday', y='daily_median_variance', #color='competitor_group',
                                title="Daily Median Price Variance by Weekday and Competitor Group",
                                labels={'daily_median_variance': 'Variance of Median Prices (Today vs. Next Day)',
                                        'weekday': 'Day of the Week',
                                        'competitor_group': 'Competitor Group'})
fig_weekday_competitor.update_layout(boxmode='group')  # Agrupar los boxplots por día de la semana
fig_weekday_competitor.show()