In [1]:
#//LIBRARIES
import math
import numpy as np
import pandas as pd

import sys
import os 
sys.path.append(os.path.expanduser('~'))

from analysts_tools.growth import *

#Procurement tools
from procurement_lib import send_slack_notification,GoogleSheet,redash
from analystcommunity.read_connection_data_warehouse import run_read_dwd_query

from datetime import datetime, timedelta

In [2]:
df_sheet=GoogleSheet("1mkqot1agSekg1czi6PU8Qv5vS8niK6MQTnSHaBRIqPU")
df_join = df_sheet.get_as_dataframe('60')

In [3]:
df_bench_info = pd.read_excel("Dados Painel InfoPrice-1731448815934.xlsx")
df_bench_info1 = df_bench_info.merge(df_join, left_on=['Identificador Produto'],right_on='ean',how='inner')
df_bench_info1['quotation_date'] = pd.to_datetime(df_bench_info1['quotation_date'], format='%d/%m/%Y').dt.strftime('%Y-%m-%d')

In [4]:
# Info bench fru
query = """
select
    cpp.collected_product_prices_id,
    quotation_date.full_date AS quotation_date,
    competitor.competitor_name,
    source_type.description as source_type,
    cat.parent_description as category,
    su.source_id,
    su.description,
    cpp.product_selected_price::float as price
from dpr_product_pricing.fact_collected_product_prices cpp
    inner join dpr_shared.dim_date quotation_date
        on cpp.dim_quotation_date = quotation_date.date_id
    inner join dpr_shared.dim_time quotation_time
        on cpp.dim_quotation_time = quotation_time.time_id
    inner join dpr_shared.dim_site site
        on cpp.dim_site = site.site_id
    inner join dpr_shared.dim_category cat
        on cpp.dim_category = cat.category_id
    inner join dpr_product_pricing.dim_product_outlier_type outlier_type
        on cpp.dim_outlier_type = outlier_type.outlier_type_id
    inner join dpr_product_pricing.dim_product_source_type source_type
        on cpp.dim_source_type = source_type.source_type_id
    inner join dpr_product_pricing.dim_product_competitor competitor
        on cpp.dim_competitor = competitor.competitor_id
   inner join dpr_product_pricing.dim_product_competitor_type competitor_type 
        on cpp.dim_product_competitor_type = competitor_type.competitor_type_id
    inner join dpr_shared.dim_stock_unit su
        on cpp.dim_stock_unit = su.stock_unit_id
where quotation_date.full_date > '2024-11-12'-30
    and site.identifier_value = 'SPO'
    AND source_type.description IN ('Zukkin','Scrapers','Infoprice')
    AND competitor_type.description='Main_Competitor'
    AND (
        competitor.competitor_name NOT ILIKE '%cayena%'
        AND competitor.competitor_name <> 'Atacadao_V2'
    )
    and su.source_parent_id = 0
    and su.source_id in (483012,105608,633637,653005,363775,73456,363754,363767,277873,452047,452043,277874,165988,284509,278348,153089,72488,154464,173891,173909,572871,648293,648297,296389,235489,72497,72329,278407,168290,284509,105608,632851,73557,124907,124405,93574,384239,277866,597691,356893,588033,597183,105599)
"""
df_bench_fru = run_read_dwd_query(query)

In [None]:
# Function to calculate the required statistics
def calculate_statistics(df):
    return df.groupby(['quotation_date','source_id'])['price'].agg(
        num_data_points='count',
        num_competitors=lambda x: df.loc[x.index, 'competitor_name'].nunique(),
        min_price='min',
        avg_price='mean',
        median_price='median',
        max_price='max',
        #delta_min_max=lambda x: x.max() - x.min()
    ).reset_index()

In [6]:
def get_info(df_zkkkkk):
    # Ensure dataframe is sorted by 'quotation_date'
    df_zkkkkk = df_zkkkkk.sort_values(by='quotation_date')
    df_zkkkkk['quotation_date'] = pd.to_datetime(df_zkkkkk['quotation_date'], errors='coerce')


    # Generate the required rows for missing dates
    new_rows = []

    for (competitor, source_id), group in df_zkkkkk.groupby(['competitor_name', 'source_id']):
        group = group.sort_values(by='quotation_date')
        group['quotation_date'] = pd.to_datetime(group['quotation_date'], errors='coerce')
        last_known_price = None
        last_known_date = None
        lifetime = 8
        
        for current_index in range(len(group)):
            current_date = group.iloc[current_index]['quotation_date']
            price = group.iloc[current_index]['price']
            
            # If this is not the first iteration, fill in missing dates
            if last_known_date is not None:
                days_diff = (current_date - last_known_date).days
                if days_diff > 1:
                    for j in range(1, min(days_diff, lifetime + 1)):
                        new_date = last_known_date + timedelta(days=j)
                        new_row = {
                            'quotation_date': new_date,
                            'competitor_name': competitor,
                            'source_id': source_id,
                            'price': last_known_price,
                            'lifetime': lifetime - j
                        }
                        new_rows.append(new_row)
                        
                        # Stop if we reach a new datapoint date
                        if new_date + timedelta(days=1) == current_date:
                            break
            
            # Update the last known values and reset lifetime
            last_known_price = price
            last_known_date = current_date
            lifetime = 8  # Reset lifetime

        # After processing all known dates for the group, continue generating rows until lifetime reaches 0
        while lifetime > 0:
            last_known_date += timedelta(days=1)
            new_row = {
                'quotation_date': last_known_date,
                'competitor_name': competitor,
                'source_id': source_id,
                'price': last_known_price,
                'lifetime': lifetime - 1
            }
            new_rows.append(new_row)
            lifetime -= 1

    # Append new rows to the dataframe
    df_zkkkkk = df_zkkkkk.append(new_rows, ignore_index=True)

    df_bench = df_zkkkkk.copy()

    # Calculate statistics for all competitors
    stats_all = calculate_statistics(df_bench)
    stats_all.columns = ['quotation_date','source_id', 'POINTS Med ALL', 'num_competitors_all', 'min_price_all', 'avg_price_all', 'Med ALL', 'max_price_all']

    # Filter for competitors that include "assai" in their name and calculate statistics
    df_assai = df_bench[df_bench['competitor_name'].str.contains(r'assai|assaí', case=False, na=False, regex=True)]
    stats_assai = calculate_statistics(df_assai)
    stats_assai.columns = ['quotation_date','source_id', 'POINTS Med Assai', 'num_competitors_assai', 'min_price_assai', 'avg_price_assai', 'Med Assai', 'max_price_assai']

    # Filter for competitors that include "atacadao" or "atacadão" in their name and calculate statistics
    df_atacadao = df_bench[df_bench['competitor_name'].str.match(r'(?i)^atacad[aã]o') & ~df_bench['competitor_name'].str.contains(r'(?i)^Atacadao_V2$')]
    stats_atacadao = calculate_statistics(df_atacadao)
    stats_atacadao.columns = ['quotation_date','source_id', 'POINTS Med Atacadao', 'num_competitors_atacadao', 'min_price_atacadao', 'avg_price_atacadao', 'Med Atacadao', 'max_price_atacadao']

    # # Filter for competitors that include "atacadao_v2" the scrapper
    # df_scrapper_atacadao = df_bench[df_bench['competitor_name'].str.contains(r'(?i)^Atacadao_V2$')]
    # stats_scrapper_atacadao = calculate_statistics(df_scrapper_atacadao)
    # stats_scrapper_atacadao.columns = ['quotation_date','source_id', 'POINTS Scrp. Atacadao', 'num_competitors_atacadao_scrapper', 'Scrp. Atacadao', 'avg_price_atacadao_scrapper', 'Med atacadao_scrapper', 'max_price_atacadao_scrapper']

    # Merge the results
    bench_df = stats_all.merge(stats_assai, on=['quotation_date','source_id'], how='left').merge(stats_atacadao, on=['quotation_date','source_id'], how='left')#.merge(stats_scrapper_atacadao, on=['quotation_date','source_id'], how='left')

    return bench_df,df_zkkkkk[['quotation_date','competitor_name','source_id','price','lifetime']],df_assai

In [7]:
bench_fru,check_fru,df_assai2 = get_info(df_bench_fru)

In [8]:
bench_info,check_info,df_assai3 = get_info(df_bench_info1)

In [9]:
bench_info.describe()

Unnamed: 0,source_id,POINTS Med ALL,num_competitors_all,min_price_all,avg_price_all,Med ALL,max_price_all,POINTS Med Assai,num_competitors_assai,min_price_assai,avg_price_assai,Med Assai,max_price_assai,POINTS Med Atacadao,num_competitors_atacadao,min_price_atacadao,avg_price_atacadao,Med Atacadao,max_price_atacadao
count,1516.0,1516.0,1516.0,1516.0,1516.0,1516.0,1516.0,1128.0,1128.0,1128.0,1128.0,1128.0,1128.0,1065.0,1065.0,1065.0,1065.0,1065.0,1065.0
mean,314698.087071,102.967018,102.770449,15.637302,19.163227,19.153684,23.62626,52.365248,52.305851,12.653963,14.681182,14.792199,16.513927,29.784038,29.742723,13.186845,15.563251,15.444582,17.428225
std,197400.292864,80.011709,79.895113,27.242198,30.478996,30.582563,37.782453,35.804328,35.716639,13.949975,17.43472,18.226766,21.050415,19.355264,19.335082,14.878435,18.044033,18.894394,22.619401
min,72329.0,1.0,1.0,0.75,0.9,0.85,0.9,1.0,1.0,1.19,1.591587,1.49,1.69,1.0,1.0,1.89,1.89,1.89,1.89
25%,153089.0,23.0,23.0,2.99,3.880048,3.85,4.99,15.75,15.75,3.65,3.840845,3.88,4.09,12.0,12.0,3.69,3.859714,3.95,4.05
50%,278348.0,96.5,96.5,5.55,6.948499,6.995,8.0,59.0,59.0,6.69,7.014007,7.08,7.25,33.0,33.0,6.69,6.89,6.89,7.15
75%,452047.0,172.25,172.0,17.9,24.028164,22.9,28.5425,70.0,70.0,18.3,20.860647,19.9,22.89,44.0,44.0,18.48,20.855435,19.9,23.28
max,653005.0,292.0,292.0,159.9,174.2975,169.9,219.99,117.0,115.0,108.48,108.48,108.48,108.48,81.0,81.0,107.4,107.4,107.4,107.4


In [10]:
bench_fru.describe()

Unnamed: 0,source_id,POINTS Med ALL,num_competitors_all,min_price_all,avg_price_all,Med ALL,max_price_all,POINTS Med Assai,num_competitors_assai,min_price_assai,avg_price_assai,Med Assai,max_price_assai,POINTS Med Atacadao,num_competitors_atacadao,min_price_atacadao,avg_price_atacadao,Med Atacadao,max_price_atacadao
count,1582.0,1582.0,1582.0,1582.0,1582.0,1582.0,1582.0,1401.0,1401.0,1401.0,1401.0,1401.0,1401.0,1423.0,1423.0,1423.0,1423.0,1423.0,1423.0
mean,314012.654235,12.537927,12.501264,16.885891,18.616131,18.623758,20.369311,4.173448,4.159172,14.929058,15.523163,15.487288,16.153383,3.973999,3.970485,16.361687,17.076025,17.109993,17.738089
std,195529.967763,5.423683,5.38719,29.858394,31.442955,31.505327,32.916991,1.379662,1.363165,23.604895,24.475192,24.40631,25.517095,1.542196,1.538708,28.023581,28.6332,28.787245,29.322269
min,72329.0,1.0,1.0,0.69,0.856667,0.84,0.89,1.0,1.0,0.89,1.161667,1.02,1.59,1.0,1.0,0.79,1.43,1.59,1.59
25%,153089.0,9.0,9.0,3.39,3.81375,3.89,4.19,4.0,4.0,3.75,3.902,3.89,3.99,3.0,3.0,3.72,3.879,3.89,3.99
50%,278348.0,14.0,14.0,5.95,6.734118,6.79,7.49,5.0,4.0,6.09,6.15,6.15,6.59,4.0,4.0,5.99,6.085,6.05,6.69
75%,452047.0,16.0,16.0,18.79,21.973333,20.6225,26.08,5.0,5.0,19.49,19.9,19.49,19.9,5.0,5.0,18.9,20.19,19.92,21.79
max,653005.0,32.0,32.0,154.9,156.566667,154.9,169.9,10.0,10.0,152.9,152.9,152.9,159.0,10.0,10.0,159.9,159.9,159.9,159.9


In [11]:
import plotly.graph_objects as go
import ipywidgets as widgets
from ipywidgets import interact

# Assuming df1 is bench_fru and df2 is bench_info, we add a 'company' column
bench_fru['company'] = 'Frubana'
bench_info['company'] = 'Infoprice'

# Concatenate both dataframes
df = pd.concat([bench_fru, bench_info])

# Function to plot the data for a selected source_id and metric
def plot_price_trends(source_id, metric):
    # Filter data based on the selected source_id
    filtered_df = df[df['source_id'] == source_id]

    # Create the figure
    fig = go.Figure()

    # Plot lines for both companies
    for company_name, color in [('Frubana', 'blue'), ('Infoprice', 'orange')]:
        company_df = filtered_df[filtered_df['company'] == company_name]
        fig.add_trace(go.Scatter(
            x=company_df['quotation_date'],
            y=company_df[metric],
            mode='lines',
            name=company_name,
            line=dict(color=color)
        ))

    # Update the layout
    fig.update_layout(
        title=f'Price Trends for {df_join.loc[df_join.source_id==source_id].nombre.values[0]} {source_id} ({metric})',
        xaxis_title='Quotation Date',
        yaxis_title=f'Median Price ({metric})',
        showlegend=True,
        template='plotly_white'
    )

    # Show the figure
    fig.show()

# Get the unique source_ids from the data
source_ids = df['source_id'].unique()

# Create dropdown widgets for `source_id` and `metric`
source_id_dropdown = widgets.Dropdown(
    options=source_ids,
    description='Source ID:',
    value=source_ids[0]  # Default to the first source_id
)

metric_dropdown = widgets.Dropdown(
    options=['Med ALL', 'Med Assai', 'Med Atacadao'],
    description='Metric:',
    value='Med ALL'  # Default to 'Med ALL'
)

# Use the interact function to make the plot interactive
interact(plot_price_trends, source_id=source_id_dropdown, metric=metric_dropdown)

interactive(children=(Dropdown(description='Source ID:', options=(72329.0, 72488.0, 72497.0, 73456.0, 73557.0,…

<function __main__.plot_price_trends(source_id, metric)>

In [12]:
df.loc[df.source_id == 588033,['source_id','company','POINTS Med ALL','POINTS Med Assai','POINTS Med Atacadao']].groupby(['source_id','company']).describe().head(60)

Unnamed: 0_level_0,Unnamed: 1_level_0,POINTS Med ALL,POINTS Med ALL,POINTS Med ALL,POINTS Med ALL,POINTS Med ALL,POINTS Med ALL,POINTS Med ALL,POINTS Med ALL,POINTS Med Assai,POINTS Med Assai,POINTS Med Assai,POINTS Med Assai,POINTS Med Assai,POINTS Med Atacadao,POINTS Med Atacadao,POINTS Med Atacadao,POINTS Med Atacadao,POINTS Med Atacadao,POINTS Med Atacadao,POINTS Med Atacadao,POINTS Med Atacadao
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
source_id,company,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
588033.0,Frubana,39.0,15.820513,2.73257,5.0,16.0,16.0,17.0,22.0,39.0,4.820513,...,5.0,7.0,39.0,4.076923,0.899843,1.0,4.0,4.0,4.0,6.0
588033.0,Infoprice,34.0,3.558824,0.746352,1.0,3.0,4.0,4.0,4.0,34.0,2.147059,...,2.75,3.0,0.0,,,,,,,
