In [68]:
!pip install matplotlib
!pip install seaborn

Defaulting to user installation because normal site-packages is not writeable
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Defaulting to user installation because normal site-packages is not writeable
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [69]:
from google.cloud import bigquery
from google.api_core.exceptions import GoogleAPIError
from datetime import datetime
import pandas as pd
import warnings
from utils import format_bytes
from utils import format_bytes, map_bq_to_pd_types, safe_to_datetime

In [70]:
def check_client_connection(client):
    """
    Verifica se o cliente do BigQuery está conectado e pode acessar o serviço corretamente.
    
    Args:
        client (bigquery.Client): Uma instância do cliente BigQuery.
    
    Returns:
        bool: Retorna True se o cliente estiver conectado, False caso contrário.
    """
    try:
        # Tenta listar os datasets disponíveis no projeto para verificar a conexão
        _ = list(client.list_datasets())
        return True
    except GoogleAPIError as e:
        print(f"Erro de conexão: {e}")
        return False

In [71]:
def config_client(project_id):
    project_id = project_id.lower()
    if project_id == 'prod' or project_id == 'petlove-dataeng-prod-01':
        project_id = 'petlove-dataeng-prod-01'
    elif project_id == 'stag' or  project_id == 'petlove-dataeng-stag-01' or project_id == 'staging':
        project_id = 'petlove-dataeng-stag-01'
    else:
        raise ValueError("Este é um nome de projeto inválido!")
    
    client = bigquery.Client(project=project_id)
    return client

In [73]:
# Criar uma instância do cliente BigQuery
client = bigquery.Client()

# Chamar a função para verificar a conexão
if check_client_connection(client):
    print("Cliente conectado corretamente ao BigQuery.")
else:
    print("Falha ao conectar o cliente ao BigQuery.")



Cliente conectado corretamente ao BigQuery.


In [74]:
project_id = 'petlove-dataeng-prod-01'  # Substitua pelo ID do projeto desejado

try:
    client = config_client(project_id)
    print(f"Cliente BigQuery configurado para o projeto {project_id}")
except ValueError as ve:
    print(f"Erro ao configurar o cliente BigQuery: {ve}")
    # Trate o erro conforme necessário



Cliente BigQuery configurado para o projeto petlove-dataeng-prod-01


In [76]:
def query_bytes(query, client):
    """
    Calcula o número de bytes que seriam processados pela execução de uma consulta SQL.
    Útil para estimar o custo de uma consulta no BigQuery antes de executá-la.
    
    Args:
        query (str): A consulta SQL para ser executada.
        client (bigquery.Client): Uma instância do cliente BigQuery.

    Returns:
        int: Número de bytes que a consulta processaria se fosse executada.
    """
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        if not check_client_connection(client):
            return "Cliente não conectado. Verifique as configurações e credenciais do cliente."
        job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
        query_job = client.query(query, job_config=job_config)
        bytes_processed = query_job.total_bytes_processed

        if bytes_processed > 1 * 1024**3:  # 1GB em bytes
            print("Quer matar o Cesar????")

    return format_bytes(bytes_processed)

In [77]:
def run_query(query, client):
    """
    Executa uma consulta SQL no BigQuery e retorna os resultados como um DataFrame.
    
    Args:
        query (str): A consulta SQL para ser executada.
        client (bigquery.Client): Uma instância do cliente BigQuery.
    
    Returns:
        pd.DataFrame: Um DataFrame contendo os resultados da consulta.
    """
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        try:

            print("Esta consulta processará {}.".format(query_bytes(query, client)))
            flag_execution = input("Deseja executar a consulta? (Y/N)")
            flag_execution = flag_execution.lower()
            df = None

            if  flag_execution == 'y':
                print("Iniciando query")
                
                start_time = datetime.now()
                print("Iniciando a consulta:", start_time.strftime('%Y-%m-%d %H:%M:%S'))

                job = client.query(query)
                result = job.result()

                # Obter o schema da consulta
                schema = result.schema
                # Mapear tipos de dados do BigQuery para pandas
                dtype_mapping = {field.name: map_bq_to_pd_types(field.field_type) for field in schema}           
                # Converter o resultado para DataFrame
                df = result.to_dataframe()           
                # Aplicar os tipos de dados ao DataFrame
                for col, dtype in dtype_mapping.items():
                    if dtype == 'datetime64[ns]':
                        df[col] = safe_to_datetime(df[col])
                    else:
                        df[col] = df[col].astype(dtype)
                
                # Registrar o tempo de término da consulta
                end_time = datetime.now()
                print("Consulta concluída:", end_time.strftime('%Y-%m-%d %H:%M:%S'))

                # Calcular a duração
                duration = end_time - start_time
                print(f"Duração da consulta: {duration}")

            else:
                print("Consulta cancelada pelo usuário. Cesar agradece!")

        except Exception as e:
            print(f'Erro ao converter os dados da consulta para DataFrame: {e}')
            df = None  # Atribua None ou maneje de outra forma dependendo do seu caso de uso
        return df

### 1. Consulta previsões stata e spark

In [78]:
query = """
   with
base_py as (
  select distinct	
    py.data,
    py.sku,
    py.filial,
    sum (py.previsao_final) as previsao_py,
    py.melhor_modelo as modelo_py,
    py.refdate as refdate_py
  from `op_octopus.previsoes_analiticas_final` py
  where	1=1
    and extract (month from	py.data) = extract (month from current_date)
    and py.previsao_final > 0
    and py.data <= current_date-1
    and py.refdate = (
      select
        max(py.refdate)
      from `op_octopus.previsoes_analiticas_final` py
      where 1=1
        and	extract (month from py.refdate) = extract (month from current_date-31)
    )
  group by
    1, 2, 3, 5, 6
),
base_st as (
  select distinct	
    st.data,
    st.sku,
    st.filial,
    sum (st.previsao) as previsao_st,
    st.modelo as modelo_st,
    st.refdate as refdate_st
  from `supply_chain_analytics.previsoes_sku_filial_versionado` st
  where	1=1
    and extract (month from	st.data) = extract (month from current_date)
    and st.previsao > 0
    and st.data <= current_date-1
    and st.refdate = (
      select
        max(st.refdate)
      from `supply_chain_analytics.previsoes_sku_filial_versionado` st
      where 1=1
        and	extract (month from st.refdate) = extract (month from current_date-31)
    )
  group by
    1, 2, 3, 5, 6
),
base1 as (
  select distinct
    data,
    sku,
    filial
  from (
    select distinct
      data,
      sku,
      filial
    from base_py
    union all
    select distinct
      data,
      sku,
      filial
    from base_st
  )
),
base2 as (
  select
    extract (year from b1.data) as ano,
    extract (month from	b1.data) as mes,
    b1.data,
    b1.sku,
    dp.erp_setor,
    b1.filial as nm_filial,
    py.modelo_py,
    py.previsao_py,
    py.refdate_py,
    st.modelo_st,
    st.previsao_st,
    st.refdate_st
  from base1 b1
  left join base_py py
    on py.data = b1.data
    and py.sku = b1.sku
    and py.filial = b1.filial
  left join base_st st
    on st.data = b1.data
    and st.sku = b1.sku
    and st.filial = b1.filial
  left join `dw_corporativo.dim_produto` dp
    on dp.sku = b1.sku
)
select * from base2
"""


In [79]:
result = query_bytes(query, client)
print(f"Bytes processados: {result}")

Quer matar o Cesar????
Bytes processados: 68.72 GB


In [80]:
# Executar a consulta usando a função run_query
consulta=run_query(query, client)

Quer matar o Cesar????
Esta consulta processará 68.72 GB.


Deseja executar a consulta? (Y/N) y


Iniciando query
Iniciando a consulta: 2024-07-16 14:12:33
Consulta concluída: 2024-07-16 14:14:11
Duração da consulta: 0:01:38.020405


In [81]:
consulta.head()

Unnamed: 0,ano,mes,data,sku,erp_setor,nm_filial,modelo_py,previsao_py,refdate_py,modelo_st,previsao_st,refdate_st
0,2024,7,2024-07-01,2637140,Acessórios,Hidrolandia-GO,,,NaT,mape_nulo,0.035714,2024-06-18
1,2024,7,2024-07-01,2775341,Alimentos,Itajai-SC,,,NaT,mape_vmd_15,0.001,2024-06-18
2,2024,7,2024-07-01,31027531307,Acessórios,Loja WLUI-SP,vmd,0.032787,2024-06-25,mape_nulo,0.021978,2024-06-18
3,2024,7,2024-07-01,3110416-1,Alimentos,Minihub-CTBA,vmd,0.554286,2024-06-25,mape_vmd_180,0.571429,2024-06-18
4,2024,7,2024-07-01,1591607,Medicina e Bem Estar,Recife-PE,,,NaT,mape_vmd_30,0.129032,2024-06-18


In [153]:
consulta[(consulta['sku'] == '31021124-3') & (consulta['nm_filial'] != 'Extrema-MG')]

Unnamed: 0,ano,mes,data,sku,erp_setor,nm_filial,modelo_py,previsao_py,refdate_py,modelo_st,previsao_st,refdate_st
3802,2024,7,2024-07-01,31021124-3,Acessórios,Recife-PE,linear_regression,0.074609,2024-06-25,mape_vmd_30,0.032258,2024-06-18
45197,2024,7,2024-07-01,31021124-3,Acessórios,Hidrolandia-GO,,,NaT,mape_nulo,0.022099,2024-06-18
62351,2024,7,2024-07-02,31021124-3,Acessórios,Recife-PE,linear_regression,0.074609,2024-06-25,mape_vmd_30,0.032258,2024-06-18
104652,2024,7,2024-07-02,31021124-3,Acessórios,Hidrolandia-GO,,,NaT,mape_nulo,0.022099,2024-06-18
139914,2024,7,2024-07-03,31021124-3,Acessórios,Hidrolandia-GO,,,NaT,mape_nulo,0.022099,2024-06-18
167894,2024,7,2024-07-03,31021124-3,Acessórios,Recife-PE,linear_regression,0.074609,2024-06-25,mape_vmd_30,0.032258,2024-06-18
187036,2024,7,2024-07-04,31021124-3,Acessórios,Hidrolandia-GO,,,NaT,mape_nulo,0.022099,2024-06-18
235068,2024,7,2024-07-04,31021124-3,Acessórios,Recife-PE,linear_regression,0.202467,2024-06-25,mape_vmd_30,0.032258,2024-06-18
259869,2024,7,2024-07-05,31021124-3,Acessórios,Hidrolandia-GO,,,NaT,mape_nulo,0.066298,2024-06-18
285114,2024,7,2024-07-05,31021124-3,Acessórios,Recife-PE,,,NaT,mape_vmd_30,0.096774,2024-06-18


In [82]:
# Obter o mês inicial e final
mes_inicial = consulta['mes'].min()
mes_final = consulta['mes'].max()

print(f'Mês inicial: {mes_inicial}')
print(f'Mês final: {mes_final}')

Mês inicial: 7
Mês final: 7


### 2. Obter qtidade realizada, receita realizada

In [90]:
query_faturado = """
select
    dim_produto.sku,
    dim_tempo.chv_tempo as data,
    dim_filial.nm_filial,
    extract(month from dim_tempo.chv_tempo) as cod_mes,
    sum(ft_pedido_faturado.quantidade_faturada) as Qtde_Fat_Real,
    sum(ft_pedido_faturado.receita_bruta_produto) as Receita_Real,
from
    dw_corporativo.ft_pedido_faturado
join dw_corporativo.dim_produto
    on ft_pedido_faturado.chv_produto = dim_produto.chv_produto
join dw_corporativo.dim_tempo
    on dim_tempo.chv_tempo = ft_pedido_faturado.chv_data_captado
join dw_corporativo.dim_filial
    on dim_filial.chv_filial = ft_pedido_faturado.chv_filial
where
    dim_tempo.chv_tempo >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)
    and dim_tempo.chv_tempo <= DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)
group by 1, 2, 3, 4
"""

In [91]:
result_faturado = query_bytes(query_faturado, client)
print(f"Bytes processados: {result_faturado}")

Quer matar o Cesar????
Bytes processados: 4.91 GB


In [92]:
# Executar a consulta usando a função run_query
faturado=run_query(query_faturado, client)

Quer matar o Cesar????
Esta consulta processará 4.91 GB.


Deseja executar a consulta? (Y/N) y


Iniciando query
Iniciando a consulta: 2024-07-16 14:25:23
Consulta concluída: 2024-07-16 14:25:44
Duração da consulta: 0:00:21.515550


In [93]:
faturado.head()

Unnamed: 0,sku,data,nm_filial,cod_mes,Qtde_Fat_Real,Receita_Real
0,2340357,2024-06-16,CD Extrema,6,12.0,42.87
1,3109843-2,2024-06-16,CD Extrema,6,1.0,151.39
2,3102751465,2024-06-16,CD Extrema,6,48.0,151.48
3,2491761,2024-06-16,CD Extrema,6,2.0,81.53
4,2559488,2024-06-16,CD Extrema,6,1.0,128.91


In [94]:
faturado_julho=faturado[faturado['cod_mes'] == 7]
faturado_julho.head()

Unnamed: 0,sku,data,nm_filial,cod_mes,Qtde_Fat_Real,Receita_Real
118139,1571800,2024-07-01,CD Extrema,7,1.0,246.34
118140,2156402,2024-07-01,CD Extrema,7,3.0,1098.51
118141,2502552,2024-07-01,CD Extrema,7,7.0,692.3
118142,2638255,2024-07-01,CD Cabo de Santo Agostinho,7,3.0,604.57
118143,2664145,2024-07-01,Petsupermarket Com Prod para Animais SA- Itaja...,7,1.0,116.99


In [154]:
faturado_julho[(faturado_julho['sku'] == '31021124-3') & (faturado_julho['nm_filial'] != 'Extrema-MG')]

Unnamed: 0,sku,data,cod_mes,Qtde_Fat_Real,Receita_Real,nm_filial


### 3. Nomes das filiais estão diferentes da saida octopus e dim_filial

In [95]:
filiais = pd.read_csv('filiais.csv')
filiais.head()

Unnamed: 0,nm_filial,nm_filial_octopus,tipo_filial
0,CD Extrema,Extrema-MG,CD
1,CD Cabo de Santo Agostinho,Recife-PE,CD
2,Petsupermarket Com Prod para Animais SA- Itaja...,Itajai-SC,CD
3,Petsupermarket Com Prod para Animais SA - GO,Hidrolandia-GO,CD
4,CD Minihub Goiânia,Minihub-GO,JÁ


In [96]:
faturado_julho = pd.merge(faturado_julho, filiais, on='nm_filial', how='left')
faturado_julho.head()

Unnamed: 0,sku,data,nm_filial,cod_mes,Qtde_Fat_Real,Receita_Real,nm_filial_octopus,tipo_filial
0,1571800,2024-07-01,CD Extrema,7,1.0,246.34,Extrema-MG,CD
1,2156402,2024-07-01,CD Extrema,7,3.0,1098.51,Extrema-MG,CD
2,2502552,2024-07-01,CD Extrema,7,7.0,692.3,Extrema-MG,CD
3,2638255,2024-07-01,CD Cabo de Santo Agostinho,7,3.0,604.57,Recife-PE,CD
4,2664145,2024-07-01,Petsupermarket Com Prod para Animais SA- Itaja...,7,1.0,116.99,Itajai-SC,CD


In [97]:
faturado_julho = (faturado_julho
                           .drop(['nm_filial', 'tipo_filial'], axis=1)
                           .rename(columns={'nm_filial_octopus': 'nm_filial'}))
faturado_julho.head()

Unnamed: 0,sku,data,cod_mes,Qtde_Fat_Real,Receita_Real,nm_filial
0,1571800,2024-07-01,7,1.0,246.34,Extrema-MG
1,2156402,2024-07-01,7,3.0,1098.51,Extrema-MG
2,2502552,2024-07-01,7,7.0,692.3,Extrema-MG
3,2638255,2024-07-01,7,3.0,604.57,Recife-PE
4,2664145,2024-07-01,7,1.0,116.99,Itajai-SC


### 4. Juntar as informações de faturado_julho com consulta

In [232]:
df = pd.merge(consulta, faturado_julho,
                                    on=['sku', 'data', 'nm_filial'], how='inner')
df.sort_values(by=['sku', 'data','nm_filial'], ascending=[False, True, False]).head(130)

Unnamed: 0,ano,mes,data,sku,erp_setor,nm_filial,modelo_py,previsao_py,refdate_py,modelo_st,previsao_st,refdate_st,cod_mes,Qtde_Fat_Real,Receita_Real
6361,2024,7,2024-07-01,319667-1,Alimentos,Recife-PE,linear_regression,1.328233,2024-06-25,SEMANAL,2.850940,2024-06-18,7,4.0,964.44
282,2024,7,2024-07-01,319667-1,Alimentos,Minihub-SP,linear_regression,0.178667,2024-06-25,mape_vmd_30,0.032258,2024-06-18,7,1.0,241.11
4575,2024,7,2024-07-01,319667-1,Alimentos,Itajai-SC,linear_regression,4.214826,2024-06-25,DIARIO,5.549957,2024-06-18,7,4.0,991.21
7838,2024,7,2024-07-01,319667-1,Alimentos,Extrema-MG,linear_regression,14.898814,2024-06-25,DIARIO,19.267723,2024-06-18,7,21.0,5021.54
12396,2024,7,2024-07-02,319667-1,Alimentos,Recife-PE,linear_regression,1.328233,2024-06-25,SEMANAL,2.850940,2024-06-18,7,2.0,482.21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31360,2024,7,2024-07-04,316528,Higiene e Beleza,Recife-PE,linear_regression,0.630113,2024-06-25,mape_vmd_60,0.491803,2024-06-18,7,1.0,26.08
27121,2024,7,2024-07-04,316528,Higiene e Beleza,Extrema-MG,linear_regression,6.494594,2024-06-25,DIARIO,3.913981,2024-06-18,7,2.0,55.07
42858,2024,7,2024-07-05,316528,Higiene e Beleza,Extrema-MG,,,NaT,DIARIO,5.531664,2024-06-18,7,1.0,26.85
47437,2024,7,2024-07-07,316528,Higiene e Beleza,Extrema-MG,linear_regression,1.791659,2024-06-25,,,NaT,7,4.0,101.46


In [189]:
df.shape

(101398, 15)

In [190]:
# Qtas linhas têm a combinação única de sku e nm_filial
num_linhas = df[['sku', 'nm_filial']].drop_duplicates().shape[0]
print("Número de linhas com combinação única de sku e nm_filial:", num_linhas)

Número de linhas com combinação única de sku e nm_filial: 24931


### 5.1. Qtos SKUs tiveram previsão Stata mas não no python?

In [182]:
# Filtrar os SKUs que têm previsão no modelo_st e não têm previsão no modelo_py
condicao = (df['modelo_st'].notna()) & (df['modelo_py'].isna())

# Obter os SKUs que satisfazem a condição
skus_sem_previsao_py = df.loc[condicao, 'sku'].unique()

print("SKUs que têm previsão no modelo_st mas não têm previsão no modelo_py:")
print(skus_sem_previsao_py)

SKUs que têm previsão no modelo_st mas não têm previsão no modelo_py:
['3102751483' '31027531411' '1969004' ... '1651164' '31027527553'
 '3102296']


In [183]:
# Obter os SKUs e nm_filial que satisfazem a condição
skus_sem_previsao_py1 = df.loc[condicao, ['sku', 'nm_filial']].drop_duplicates()

print("SKUs/nm_filial que têm previsão no modelo_st mas não têm previsão no modelo_py:")
print(skus_sem_previsao_py1)

SKUs/nm_filial que têm previsão no modelo_st mas não têm previsão no modelo_py:
                sku       nm_filial
26       3102751483      Extrema-MG
32      31027531411      Extrema-MG
42          1969004       Itajai-SC
44          2419815      Minihub-SP
57          2490069      Extrema-MG
...             ...             ...
101170      1651164  Hidrolandia-GO
101186  31027527553      Extrema-MG
101189      3102296      Extrema-MG
101285  31027528647       Itajai-SC
101326   31024424-2      Minihub-SP

[12991 rows x 2 columns]


In [202]:
# Salvar DataFrame para um arquivo CSV
skus_sem_previsao_py1.to_csv('skus_com_previsao_stata_sem_previsao_python.csv', index=False)

In [191]:
num_linhas1 = df.loc[condicao, ['sku', 'nm_filial']].drop_duplicates().shape[0]
print("Número de linhas com combinação única de sku e nm_filial:", num_linhas1)

Número de linhas com combinação única de sku e nm_filial: 12991


In [194]:
# Calcular a proporção
proporcao = num_linhas1 / num_linhas
print("Proporção de linhas que satisfazem a condição:", proporcao)

Proporção de linhas que satisfazem a condição: 0.5210781757651117


In [201]:
# Verificar se o SKU '316528' na 'Extrema-MG' está presente na lista
resultado_esperado = skus_sem_previsao_py1[(skus_sem_previsao_py1['sku'] == '316528') & (skus_sem_previsao_py1['nm_filial'] == 'Extrema-MG')]

print("Resultado encontrado:")
print(resultado_esperado)

Resultado encontrado:
          sku   nm_filial
42858  316528  Extrema-MG


### 5.2. Olhando para somente as previsoes times_series no modelo_py e DIARIO ou SEMANAL no modelo_st1

In [101]:
# Filtrando para time_series em modelo_py e DIARIO ou SEMANAL em modelo_st
filtro = (
    (df['modelo_py'].isin(['time_series'])) &
    (df['modelo_st'].isin(['DIARIO', 'SEMANAL']))
)

# Aplicando o filtro e selecionando os campos desejados
resultado = df.loc[filtro, ['sku', 'modelo_py', 'modelo_st', 'nm_filial', 'previsao_st','previsao_py', 'Qtde_Fat_Real', 'Receita_Real']].copy()
resultado.head()

Unnamed: 0,sku,modelo_py,modelo_st,nm_filial,previsao_st,previsao_py,Qtde_Fat_Real,Receita_Real
118,31014070-1,time_series,DIARIO,Hidrolandia-GO,4.737158,3.5082,2.0,269.84
324,2492168,time_series,DIARIO,Extrema-MG,11.807384,9.5772,4.0,514.25
433,3110176-2,time_series,DIARIO,Extrema-MG,27.004519,32.0317,39.0,5021.85
527,2492167,time_series,DIARIO,Extrema-MG,3.086389,6.6475,4.0,327.59
583,2492356,time_series,DIARIO,Hidrolandia-GO,5.433556,5.5808,2.0,85.4


In [102]:
len(resultado['sku'].unique())

71

In [114]:
# Agrupando por sku e filial e calculando a média
df_media = resultado.groupby(['sku', 'nm_filial']).agg({
    'modelo_py': 'first',  
    'modelo_st': 'first',  
    'previsao_st': 'mean',
    'previsao_py': 'mean',
    'Qtde_Fat_Real': 'mean'
}).reset_index()
df_media

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real
0,1542571,Hidrolandia-GO,time_series,SEMANAL,3.186205,5.795400,5.500000
1,1542573,Extrema-MG,time_series,DIARIO,20.282679,56.697100,24.125000
2,1559763,Extrema-MG,time_series,DIARIO,10.391449,33.591433,11.666667
3,1559763,Recife-PE,time_series,DIARIO,3.835912,9.107950,2.750000
4,1648959,Extrema-MG,time_series,DIARIO,5.109772,9.928489,6.111111
...,...,...,...,...,...,...,...
83,3110550-1,Extrema-MG,time_series,DIARIO,7.186771,10.768778,7.111111
84,311226-2,Recife-PE,time_series,DIARIO,4.538908,3.746175,3.000000
85,311733-1,Extrema-MG,time_series,DIARIO,7.784586,9.054522,8.000000
86,311810-1,Extrema-MG,time_series,DIARIO,9.628620,14.702378,6.666667


In [203]:
# Calcular a diferença absoluta entre previsao_st e previsao_py em cada linha
df_media['diff_abs'] = abs(df_media['previsao_st'] - df_media['previsao_py'])

In [204]:
# Ordenar o DataFrame pela coluna diff_abs em ordem decrescente
df_media_sorted = df_media.sort_values(by='diff_abs', ascending=False)
df_media_sorted

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real,diff_abs
27,2492358,Extrema-MG,time_series,DIARIO,91.757735,160.289022,98.444444,68.531288
25,2492356,Extrema-MG,time_series,DIARIO,39.336562,88.860133,64.555556,49.523571
1,1542573,Extrema-MG,time_series,DIARIO,20.282679,56.697100,24.125000,36.414421
5,1743118,Extrema-MG,time_series,DIARIO,25.532255,51.364478,43.777778,25.832223
14,2031745,Extrema-MG,time_series,DIARIO,48.250765,72.590722,38.777778,24.339957
...,...,...,...,...,...,...,...,...
34,2648837,Extrema-MG,time_series,DIARIO,10.965248,10.668044,15.888889,0.297203
76,3109181-2,Extrema-MG,time_series,DIARIO,4.332568,4.520767,3.833333,0.188199
30,2536244,Extrema-MG,time_series,DIARIO,7.212332,7.178044,5.777778,0.034287
78,3110172-2,Itajai-SC,time_series,DIARIO,5.427811,5.404943,3.714286,0.022868


#### A partir daqui análises considerando os SKUs mais importantes (top 10 SKUs) com base na receita de 180 dias  

### 6. Preço historico 180 dias

In [129]:
query_preco_historico = """
select
    dp.sku,
    df.nm_filial,
    sum(fpf.quantidade) as quantidade,
    sum(fpf.receita_bruta_produto) as receita,
    sum(fpf.receita_bruta_produto)/sum(fpf.quantidade) as preco_medio
  from `petlove-dataeng-prod-01.dw_corporativo.ft_pedido_faturado` fpf
  left join `petlove-dataeng-prod-01.dw_corporativo.dim_produto` dp
    on dp.chv_produto = fpf.chv_produto
  left join `petlove-dataeng-prod-01.dw_corporativo.dim_filial` df
    on df.chv_filial = fpf.chv_filial
  where 1=1
    and fpf.chv_data_emissao_nota_fiscal between current_date-181 and current_date-1
  group by 1, 2
"""  

In [130]:
result_preco_historico = query_bytes(query_preco_historico, client)
print(f"Bytes processados: {result_preco_historico}")

Bytes processados: 563.77 MB


In [131]:
# Executar a consulta usando a função run_query
preco_historico=run_query(query_preco_historico, client)

Esta consulta processará 563.77 MB.


Deseja executar a consulta? (Y/N) y


Iniciando query
Iniciando a consulta: 2024-07-16 16:13:53
Consulta concluída: 2024-07-16 16:14:00
Duração da consulta: 0:00:07.179320


### Preço historico 180 dias

In [132]:
preco_historico.head()

Unnamed: 0,sku,nm_filial,quantidade,receita,preco_medio
0,3110184-1,CD Extrema,29441.0,164988.85,5.60405
1,2032068,CD Extrema,120286.0,343993.72,2.859798
2,31024421-2,CD Extrema,1465.0,246782.89,168.452485
3,2753635,CD Extrema,2603.0,22743.66,8.73748
4,31021721-2,CD Extrema,2627.0,134585.49,51.231629


In [133]:
preco_historico = pd.merge(preco_historico, filiais, on='nm_filial', how='left')
preco_historico.head()

Unnamed: 0,sku,nm_filial,quantidade,receita,preco_medio,nm_filial_octopus,tipo_filial
0,3110184-1,CD Extrema,29441.0,164988.85,5.60405,Extrema-MG,CD
1,2032068,CD Extrema,120286.0,343993.72,2.859798,Extrema-MG,CD
2,31024421-2,CD Extrema,1465.0,246782.89,168.452485,Extrema-MG,CD
3,2753635,CD Extrema,2603.0,22743.66,8.73748,Extrema-MG,CD
4,31021721-2,CD Extrema,2627.0,134585.49,51.231629,Extrema-MG,CD


In [134]:
preco_historico = (preco_historico
                           .drop(['nm_filial', 'tipo_filial'], axis=1)
                           .rename(columns={'nm_filial_octopus': 'nm_filial'}))
preco_historico.head()

Unnamed: 0,sku,quantidade,receita,preco_medio,nm_filial
0,3110184-1,29441.0,164988.85,5.60405,Extrema-MG
1,2032068,120286.0,343993.72,2.859798,Extrema-MG
2,31024421-2,1465.0,246782.89,168.452485,Extrema-MG
3,2753635,2603.0,22743.66,8.73748,Extrema-MG
4,31021721-2,2627.0,134585.49,51.231629,Extrema-MG


In [170]:
preco_historico['nm_filial'].unique()

array(['Extrema-MG', 'Minihub-CTBA', 'Minihub-POA', 'Minihub-SP',
       'Minihub-BH', 'Minihub-RJ', 'Recife-PE', 'Hidrolandia-GO',
       'Itajai-SC'], dtype=object)

In [135]:
# Obter os 10 SKUs com maior receita
df_top_receita = preco_historico.nlargest(10, 'receita')[['sku', 'nm_filial', 'receita']]
df_top_receita

Unnamed: 0,sku,nm_filial,receita
7594,31027526344,Extrema-MG,6044616.39
61,31014070-2,Extrema-MG,5008430.78
2551,31027526341,Extrema-MG,4031455.42
6800,2492303,Extrema-MG,3667865.56
5097,31017079-3,Extrema-MG,2950629.1
9297,31022435-3,Extrema-MG,2800778.11
8429,2406968,Extrema-MG,2625621.28
9258,31021721-3,Extrema-MG,2316347.66
13645,3105886,Extrema-MG,2142409.07
4253,310001-3,Extrema-MG,2041894.73


In [136]:
# Obter os 10 SKUs com maior preco medio
df_top_preco_medio = preco_historico.nlargest(10, 'preco_medio')[['sku', 'nm_filial', 'preco_medio']]
df_top_preco_medio

Unnamed: 0,sku,nm_filial,preco_medio
6727,2774067,Extrema-MG,1799.65
42471,31021124-3,Hidrolandia-GO,1461.725
37209,31021124-3,Recife-PE,1413.973333
12567,31021124-2,Extrema-MG,1331.8775
9766,2698120,Extrema-MG,1326.925517
5456,31021124-3,Extrema-MG,1324.487778
37591,31021124-2,Recife-PE,1297.116667
54738,31021124-4,Itajai-SC,1179.905
41241,2698119,Hidrolandia-GO,1168.763333
35826,31021124-4,Recife-PE,1128.315


### Como foram as previsões para os 10 SKUs com maior receita (180 dias)?

In [139]:
df_1 = pd.merge(df, df_top_receita,
                                    on=['sku', 'nm_filial'], how='inner')

In [143]:
df_1 = df_1.loc[:, ['sku', 'erp_setor', 'nm_filial', 'modelo_py', 'previsao_py', 
                             'modelo_st', 'previsao_st', 'Qtde_Fat_Real', 'Receita_Real', 'receita']]
df_1.head()

Unnamed: 0,sku,erp_setor,nm_filial,modelo_py,previsao_py,modelo_st,previsao_st,Qtde_Fat_Real,Receita_Real,receita
0,31027526341,Higiene e Beleza,Extrema-MG,linear_regression,400.194762,DIARIO,247.059799,775.0,40972.06,4031455.42
1,31027526341,Higiene e Beleza,Extrema-MG,linear_regression,400.194762,DIARIO,231.713745,599.0,31491.82,4031455.42
2,31027526341,Higiene e Beleza,Extrema-MG,linear_regression,400.194762,DIARIO,234.185638,555.0,29863.49,4031455.42
3,31027526341,Higiene e Beleza,Extrema-MG,linear_regression,1171.302791,DIARIO,236.684006,1164.0,45831.36,4031455.42
4,31027526341,Higiene e Beleza,Extrema-MG,,,DIARIO,642.590149,877.0,36828.54,4031455.42


In [146]:
# Agrupando por sku e filial e calculando a média
df_1_media = df_1.groupby(['sku', 'nm_filial']).agg({
    'modelo_py': 'first',  
    'modelo_st': 'first',  
    'previsao_st': 'mean',
    'previsao_py': 'mean',
    'Qtde_Fat_Real': 'mean'
}).reset_index()
df_1_media

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real
0,2406968,Extrema-MG,linear_regression,DIARIO,311.578867,488.194267,233.307692
1,2492303,Extrema-MG,linear_regression,mape_vmd_15,167.471591,180.020846,126.230769
2,310001-3,Extrema-MG,linear_regression,DIARIO,128.615637,126.705364,89.0
3,31014070-2,Extrema-MG,linear_regression,DIARIO,242.399647,224.045794,175.461538
4,31017079-3,Extrema-MG,linear_regression,DIARIO,175.376412,154.810158,123.307692
5,31021721-3,Extrema-MG,vmd,DIARIO,123.117181,105.0,92.846154
6,31022435-3,Extrema-MG,linear_regression,DIARIO,137.24826,141.929182,116.307692
7,31027526341,Extrema-MG,linear_regression,DIARIO,285.226456,532.783913,558.076923
8,31027526344,Extrema-MG,,DIARIO,668.908861,,834.818182
9,3105886,Extrema-MG,linear_regression,DIARIO,300.459552,455.790151,296.230769


### Ordenar os 10 SKUs com base na maior diferença entre as previsões 

In [205]:
# Calcular a diferença absoluta e ordenar da maior diferenca para a menor 
df_1_media_sorted = df_1_media.assign(diff_abs=lambda x: abs(x['previsao_st'] - x['previsao_py'])).sort_values(by='diff_abs', ascending=False)
df_1_media_sorted

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real,diff_abs
7,31027526341,Extrema-MG,linear_regression,DIARIO,285.226456,532.783913,558.076923,247.557457
0,2406968,Extrema-MG,linear_regression,DIARIO,311.578867,488.194267,233.307692,176.6154
9,3105886,Extrema-MG,linear_regression,DIARIO,300.459552,455.790151,296.230769,155.330599
4,31017079-3,Extrema-MG,linear_regression,DIARIO,175.376412,154.810158,123.307692,20.566254
3,31014070-2,Extrema-MG,linear_regression,DIARIO,242.399647,224.045794,175.461538,18.353853
5,31021721-3,Extrema-MG,vmd,DIARIO,123.117181,105.0,92.846154,18.117181
1,2492303,Extrema-MG,linear_regression,mape_vmd_15,167.471591,180.020846,126.230769,12.549255
6,31022435-3,Extrema-MG,linear_regression,DIARIO,137.24826,141.929182,116.307692,4.680922
2,310001-3,Extrema-MG,linear_regression,DIARIO,128.615637,126.705364,89.0,1.910273
8,31027526344,Extrema-MG,,DIARIO,668.908861,,834.818182,


### Como foram as previsões para os 10 SKUs com maior preço médio (180 dias)?

In [141]:
df_2 = pd.merge(df, df_top_preco_medio,
                                    on=['sku', 'nm_filial'], how='inner')

In [152]:
df[(df['sku'] == '31021124-3') & (df['nm_filial'] != 'Extrema-MG')]

Unnamed: 0,ano,mes,data,sku,erp_setor,nm_filial,modelo_py,previsao_py,refdate_py,modelo_st,previsao_st,refdate_st,cod_mes,Qtde_Fat_Real,Receita_Real


In [145]:
df_2 = df_2.loc[:, ['sku', 'erp_setor', 'nm_filial', 'modelo_py', 'previsao_py', 
                             'modelo_st', 'previsao_st', 'Qtde_Fat_Real', 'Receita_Real', 'preco_medio']]
df_2.head()

Unnamed: 0,sku,erp_setor,nm_filial,modelo_py,previsao_py,modelo_st,previsao_st,Qtde_Fat_Real,Receita_Real,preco_medio
0,31021124-3,Acessórios,Extrema-MG,linear_regression,0.497826,mape_vmd_15,0.25,1.0,1383.3,1324.487778
1,31021124-3,Acessórios,Extrema-MG,linear_regression,0.172525,mape_vmd_15,0.25,1.0,1430.55,1324.487778
2,2698120,Medicina e Bem Estar,Extrema-MG,linear_regression,0.063717,mape_vmd_180,0.127072,1.0,1357.16,1326.925517


In [147]:
# Agrupando por sku e filial e calculando a média
df_2_media = df_2.groupby(['sku', 'nm_filial']).agg({
    'modelo_py': 'first',  
    'modelo_st': 'first',  
    'previsao_st': 'mean',
    'previsao_py': 'mean',
    'Qtde_Fat_Real': 'mean'
}).reset_index()
df_2_media

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real
0,2698120,Extrema-MG,linear_regression,mape_vmd_180,0.127072,0.063717,1.0
1,31021124-3,Extrema-MG,linear_regression,mape_vmd_15,0.25,0.335176,1.0


### 7. Olhando SKUs com maior receita (180 dias) para cada filial 

### 7.1. Recife-PE

In [160]:
#Obter os SKUs com maior receita para Recife-PE
filtro_recife = preco_historico.loc[preco_historico['nm_filial'] == 'Recife-PE']
filtro_recife.head()

Unnamed: 0,sku,quantidade,receita,preco_medio,nm_filial
27324,2021112,287.0,13502.47,47.046934,Recife-PE
27325,3110247-2,186.0,77683.52,417.653333,Recife-PE
27326,2498301,665.0,183528.39,275.982541,Recife-PE
27327,2678207,44.0,2189.8,49.768182,Recife-PE
27328,3128079,188.0,13448.01,71.531968,Recife-PE


In [161]:
# Obter os 10 SKUs para Recife-PE com maior receita
df_top_receita_recife = filtro_recife.nlargest(10, 'receita')[['sku', 'nm_filial', 'receita']]
df_top_receita_recife

Unnamed: 0,sku,nm_filial,receita
36502,31014070-2,Recife-PE,571431.53
29024,31022435-3,Recife-PE,533899.5
30184,31017079-3,Recife-PE,489444.69
27894,2492303,Recife-PE,474489.95
33573,31027526344,Recife-PE,425944.11
32465,3110240-3,Recife-PE,329963.39
33590,2498381,Recife-PE,329544.52
37048,2661128,Recife-PE,326950.02
32463,310001-3,Recife-PE,310088.96
35333,2536357,Recife-PE,242665.03


### Como foram as previsões para os 10 SKUs para Recife-PE com maior receita (180 dias)?

In [162]:
df_recife = pd.merge(df, df_top_receita_recife,
                                    on=['sku', 'nm_filial'], how='inner')

In [163]:
df_recife = df_recife.loc[:, ['sku', 'erp_setor', 'nm_filial', 'modelo_py', 'previsao_py', 
                             'modelo_st', 'previsao_st', 'Qtde_Fat_Real', 'Receita_Real', 'receita']]
df_recife.head()

Unnamed: 0,sku,erp_setor,nm_filial,modelo_py,previsao_py,modelo_st,previsao_st,Qtde_Fat_Real,Receita_Real,receita
0,31022435-3,Alimentos,Recife-PE,linear_regression,28.626669,DIARIO,35.364243,38.0,6132.62,533899.5
1,31022435-3,Alimentos,Recife-PE,linear_regression,28.626669,DIARIO,35.818089,26.0,4222.05,533899.5
2,31022435-3,Alimentos,Recife-PE,linear_regression,28.626669,DIARIO,34.353474,42.0,6705.64,533899.5
3,31022435-3,Alimentos,Recife-PE,linear_regression,79.255778,DIARIO,32.948746,20.0,3202.28,533899.5
4,31022435-3,Alimentos,Recife-PE,,,DIARIO,82.940475,35.0,5676.66,533899.5


In [164]:
# Agrupando por sku e filial e calculando a média
df_recife_media = df_recife.groupby(['sku', 'nm_filial']).agg({
    'modelo_py': 'first',  
    'modelo_st': 'first',  
    'previsao_st': 'mean',
    'previsao_py': 'mean',
    'Qtde_Fat_Real': 'mean'
}).reset_index()
df_recife_media

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real
0,2492303,Recife-PE,linear_regression,mape_vmd_7,34.431818,38.93133,17.692308
1,2498381,Recife-PE,linear_regression,DIARIO,38.416695,13.65824,9.307692
2,2536357,Recife-PE,linear_regression,DIARIO,6.592587,6.125231,4.0
3,2661128,Recife-PE,linear_regression,mape_vmd_15,38.693182,29.4742,23.769231
4,310001-3,Recife-PE,linear_regression,DIARIO,26.181836,27.58078,12.307692
5,31014070-2,Recife-PE,vmd,DIARIO,55.326993,37.74194,16.307692
6,31017079-3,Recife-PE,vmd,DIARIO,43.96132,34.75073,22.615385
7,31022435-3,Recife-PE,linear_regression,DIARIO,41.261205,35.15971,24.461538
8,31027526344,Recife-PE,linear_regression,DIARIO,46.157007,1.125855e-10,68.454545
9,3110240-3,Recife-PE,linear_regression,DIARIO,27.852678,21.8932,13.692308


### Ordenar os 10 SKUs com base na maior diferença entre as previsões

In [206]:
# Calcular a diferença absoluta e ordenar da maior diferenca para a menor 
df_recife_media_sorted = df_recife_media.assign(diff_abs=lambda x: abs(x['previsao_st'] - x['previsao_py'])).sort_values(by='diff_abs', ascending=False)
df_recife_media_sorted

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real,diff_abs
8,31027526344,Recife-PE,linear_regression,DIARIO,46.157007,1.125855e-10,68.454545,46.157007
1,2498381,Recife-PE,linear_regression,DIARIO,38.416695,13.65824,9.307692,24.758458
5,31014070-2,Recife-PE,vmd,DIARIO,55.326993,37.74194,16.307692,17.585057
3,2661128,Recife-PE,linear_regression,mape_vmd_15,38.693182,29.4742,23.769231,9.218978
6,31017079-3,Recife-PE,vmd,DIARIO,43.96132,34.75073,22.615385,9.210587
7,31022435-3,Recife-PE,linear_regression,DIARIO,41.261205,35.15971,24.461538,6.101495
9,3110240-3,Recife-PE,linear_regression,DIARIO,27.852678,21.8932,13.692308,5.959474
0,2492303,Recife-PE,linear_regression,mape_vmd_7,34.431818,38.93133,17.692308,4.499516
4,310001-3,Recife-PE,linear_regression,DIARIO,26.181836,27.58078,12.307692,1.398944
2,2536357,Recife-PE,linear_regression,DIARIO,6.592587,6.125231,4.0,0.467355


### 7.2. Hidrolandia-GO	

In [165]:
#Obter os SKUs com maior receita para Hidrolandia-GO
filtro_hidrolandia = preco_historico.loc[preco_historico['nm_filial'] == 'Hidrolandia-GO']
filtro_hidrolandia.head()

Unnamed: 0,sku,quantidade,receita,preco_medio,nm_filial
37660,2724379,150.0,23018.45,153.456333,Hidrolandia-GO
37661,2637130,9.0,72.81,8.09,Hidrolandia-GO
37662,2492358,1914.0,25498.98,13.322351,Hidrolandia-GO
37663,31027524583,81.0,4379.25,54.064815,Hidrolandia-GO
37664,2684609,7.0,238.65,34.092857,Hidrolandia-GO


In [166]:
# Obter os 10 SKUs para Hidrolandia-GO com maior receita
df_top_receita_hidrolandia = filtro_hidrolandia.nlargest(10, 'receita')[['sku', 'nm_filial', 'receita']]
df_top_receita_hidrolandia

Unnamed: 0,sku,nm_filial,receita
39157,31027526344,Hidrolandia-GO,400034.74
41687,31014070-2,Hidrolandia-GO,242928.77
43768,31027526341,Hidrolandia-GO,237784.38
45307,2492303,Hidrolandia-GO,234334.56
44328,31017079-3,Hidrolandia-GO,202578.88
39639,31022435-3,Hidrolandia-GO,172431.83
43770,2406968,Hidrolandia-GO,163136.9
43211,2616989,Hidrolandia-GO,132655.64
45312,310001-3,Hidrolandia-GO,131213.77
38637,3105886,Hidrolandia-GO,130735.01


### Como foram as previsões para os 10 SKUs para Hidrolandia-GO com maior receita (180 dias)?¶

In [167]:
df_hidrolandia = pd.merge(df, df_top_receita_hidrolandia,
                                    on=['sku', 'nm_filial'], how='inner')

In [168]:
df_hidrolandia = df_hidrolandia.loc[:, ['sku', 'erp_setor', 'nm_filial', 'modelo_py', 'previsao_py', 
                             'modelo_st', 'previsao_st', 'Qtde_Fat_Real', 'Receita_Real', 'receita']]
df_hidrolandia.head()

Unnamed: 0,sku,erp_setor,nm_filial,modelo_py,previsao_py,modelo_st,previsao_st,Qtde_Fat_Real,Receita_Real,receita
0,2616989,Alimentos,Hidrolandia-GO,linear_regression,8.492536,DIARIO,6.053285,7.0,824.77,132655.64
1,2616989,Alimentos,Hidrolandia-GO,linear_regression,8.492536,DIARIO,5.60046,14.0,1689.52,132655.64
2,2616989,Alimentos,Hidrolandia-GO,linear_regression,8.492536,DIARIO,5.458773,5.0,547.76,132655.64
3,2616989,Alimentos,Hidrolandia-GO,linear_regression,23.369698,DIARIO,5.320671,5.0,607.15,132655.64
4,2616989,Alimentos,Hidrolandia-GO,,,DIARIO,11.530809,9.0,1092.89,132655.64


In [169]:
# Agrupando por sku e filial e calculando a média
df_hidrolandia_media = df_hidrolandia.groupby(['sku', 'nm_filial']).agg({
    'modelo_py': 'first', 
    'modelo_st': 'first',  
    'previsao_st': 'mean',
    'previsao_py': 'mean',
    'Qtde_Fat_Real': 'mean'
}).reset_index()
df_hidrolandia_media

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real
0,2406968,Hidrolandia-GO,vmd,DIARIO,30.317909,23.32496,13.384615
1,2492303,Hidrolandia-GO,linear_regression,mape_vmd_30,14.252199,12.53185,7.769231
2,2616989,Hidrolandia-GO,linear_regression,DIARIO,6.435977,10.7897,8.0
3,310001-3,Hidrolandia-GO,vmd,DIARIO,9.926348,8.920141,5.307692
4,31014070-2,Hidrolandia-GO,time_series,DIARIO,12.844519,15.14341,8.583333
5,31017079-3,Hidrolandia-GO,linear_regression,DIARIO,13.589607,12.16434,8.692308
6,31022435-3,Hidrolandia-GO,vmd,DIARIO,10.479644,9.5231,7.846154
7,31027526341,Hidrolandia-GO,,DIARIO,27.250999,,49.272727
8,31027526344,Hidrolandia-GO,linear_regression,DIARIO,44.294943,8.882061e-11,53.090909
9,3105886,Hidrolandia-GO,linear_regression,DIARIO,25.647856,32.71784,17.076923


### Ordenar os 10 SKUs com base na maior diferença entre as previsões

In [207]:
# Calcular a diferença absoluta e ordenar da maior diferenca para a menor 
df_hidrolandia_media_sorted = df_hidrolandia_media.assign(diff_abs=lambda x: abs(x['previsao_st'] - x['previsao_py'])).sort_values(by='diff_abs', ascending=False)
df_hidrolandia_media_sorted

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real,diff_abs
8,31027526344,Hidrolandia-GO,linear_regression,DIARIO,44.294943,8.882061e-11,53.090909,44.294943
9,3105886,Hidrolandia-GO,linear_regression,DIARIO,25.647856,32.71784,17.076923,7.069987
0,2406968,Hidrolandia-GO,vmd,DIARIO,30.317909,23.32496,13.384615,6.992947
2,2616989,Hidrolandia-GO,linear_regression,DIARIO,6.435977,10.7897,8.0,4.353727
4,31014070-2,Hidrolandia-GO,time_series,DIARIO,12.844519,15.14341,8.583333,2.298891
1,2492303,Hidrolandia-GO,linear_regression,mape_vmd_30,14.252199,12.53185,7.769231,1.720353
5,31017079-3,Hidrolandia-GO,linear_regression,DIARIO,13.589607,12.16434,8.692308,1.42527
3,310001-3,Hidrolandia-GO,vmd,DIARIO,9.926348,8.920141,5.307692,1.006207
6,31022435-3,Hidrolandia-GO,vmd,DIARIO,10.479644,9.5231,7.846154,0.956544
7,31027526341,Hidrolandia-GO,,DIARIO,27.250999,,49.272727,


### 7.3. Itajai-SC

In [171]:
#Obter os SKUs com maior receita para Itajai-SC
filtro_itajai = preco_historico.loc[preco_historico['nm_filial'] == 'Itajai-SC']
filtro_itajai.head()

Unnamed: 0,sku,quantidade,receita,preco_medio,nm_filial
46810,31014070-1,1593.0,215466.06,135.258041,Itajai-SC
46811,2406968,7594.0,463583.03,61.045961,Itajai-SC
46812,1784069,107.0,20584.63,192.37972,Itajai-SC
46813,1894403,37.0,7276.77,196.669459,Itajai-SC
46814,31027532157,54.0,1741.46,32.249259,Itajai-SC


In [172]:
# Obter os 10 SKUs para Itajai-SC com maior receita
df_top_receita_itajai = filtro_itajai.nlargest(10, 'receita')[['sku', 'nm_filial', 'receita']]
df_top_receita_itajai

Unnamed: 0,sku,nm_filial,receita
49267,31027526344,Itajai-SC,1322395.92
54064,31014070-2,Itajai-SC,1150113.56
52269,31027526341,Itajai-SC,850924.59
52895,2492303,Itajai-SC,813162.41
52266,31022435-3,Itajai-SC,778385.0
48050,31017079-3,Itajai-SC,771742.26
54069,31021721-3,Itajai-SC,543803.6
52902,3110240-3,Itajai-SC,525142.15
51698,2616989,Itajai-SC,504357.36
46811,2406968,Itajai-SC,463583.03


### Como foram as previsões para os 10 SKUs para Itajai-SC com maior receita (180 dias)?¶


In [173]:
df_itajai = pd.merge(df, df_top_receita_itajai,
                                    on=['sku', 'nm_filial'], how='inner')

In [174]:
df_itajai = df_itajai.loc[:, ['sku', 'erp_setor', 'nm_filial', 'modelo_py', 'previsao_py', 
                             'modelo_st', 'previsao_st', 'Qtde_Fat_Real', 'Receita_Real', 'receita']]
df_itajai.head()

Unnamed: 0,sku,erp_setor,nm_filial,modelo_py,previsao_py,modelo_st,previsao_st,Qtde_Fat_Real,Receita_Real,receita
0,31021721-3,Alimentos,Itajai-SC,linear_regression,32.576798,DIARIO,32.684368,28.0,4256.01,543803.6
1,31021721-3,Alimentos,Itajai-SC,linear_regression,32.576798,DIARIO,31.806374,30.0,4497.97,543803.6
2,31021721-3,Alimentos,Itajai-SC,linear_regression,32.576798,DIARIO,29.807085,22.0,3271.15,543803.6
3,31021721-3,Alimentos,Itajai-SC,linear_regression,90.850932,DIARIO,27.933468,19.0,2879.03,543803.6
4,31021721-3,Alimentos,Itajai-SC,,,DIARIO,70.317284,27.0,4055.75,543803.6


In [176]:
# Agrupando por sku e filial e calculando a média
df_itajai_media = df_itajai.groupby(['sku', 'nm_filial']).agg({
    'modelo_py': 'first',  
    'modelo_st': 'first',  
    'previsao_st': 'mean',
    'previsao_py': 'mean',
    'Qtde_Fat_Real': 'mean'
}).reset_index()
df_itajai_media

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real
0,2406968,Itajai-SC,linear_regression,DIARIO,82.588913,132.4834,40.307692
1,2492303,Itajai-SC,linear_regression,mape_vmd_7,44.659091,59.35054,28.384615
2,2616989,Itajai-SC,linear_regression,DIARIO,23.966791,47.65052,27.615385
3,31014070-2,Itajai-SC,linear_regression,DIARIO,55.528725,77.16669,43.692308
4,31017079-3,Itajai-SC,linear_regression,DIARIO,65.226954,59.59402,31.615385
5,31021721-3,Itajai-SC,linear_regression,DIARIO,35.573349,40.46948,21.153846
6,31022435-3,Itajai-SC,linear_regression,DIARIO,40.994186,56.37328,32.538462
7,31027526341,Itajai-SC,linear_regression,DIARIO,84.983659,1.691518e-10,136.636364
8,31027526344,Itajai-SC,linear_regression,DIARIO,114.033741,3.448864e-10,191.090909
9,3110240-3,Itajai-SC,linear_regression,DIARIO,40.378368,37.918,21.538462


### Ordenar os 10 SKUs com base na maior diferença entre as previsões

In [211]:
# Calcular a diferença absoluta e ordenar da maior diferenca para a menor 
df_itajai_media_sorted = df_itajai_media.assign(diff_abs=lambda x: abs(x['previsao_st'] - x['previsao_py'])).sort_values(by='diff_abs', ascending=False)
df_itajai_media_sorted

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real,diff_abs
8,31027526344,Itajai-SC,linear_regression,DIARIO,114.033741,3.448864e-10,191.090909,114.033741
7,31027526341,Itajai-SC,linear_regression,DIARIO,84.983659,1.691518e-10,136.636364,84.983659
0,2406968,Itajai-SC,linear_regression,DIARIO,82.588913,132.4834,40.307692,49.89453
2,2616989,Itajai-SC,linear_regression,DIARIO,23.966791,47.65052,27.615385,23.683731
3,31014070-2,Itajai-SC,linear_regression,DIARIO,55.528725,77.16669,43.692308,21.637963
6,31022435-3,Itajai-SC,linear_regression,DIARIO,40.994186,56.37328,32.538462,15.379095
1,2492303,Itajai-SC,linear_regression,mape_vmd_7,44.659091,59.35054,28.384615,14.691451
4,31017079-3,Itajai-SC,linear_regression,DIARIO,65.226954,59.59402,31.615385,5.632937
5,31021721-3,Itajai-SC,linear_regression,DIARIO,35.573349,40.46948,21.153846,4.89613
9,3110240-3,Itajai-SC,linear_regression,DIARIO,40.378368,37.918,21.538462,2.460371


### 7.4 Minihub-CTBA

In [213]:
#Obter os SKUs com maior receita para Minihub-CTBA
filtro_Minihub_CTBA = preco_historico.loc[preco_historico['nm_filial'] == 'Minihub-CTBA']
filtro_Minihub_CTBA.head()

Unnamed: 0,sku,quantidade,receita,preco_medio,nm_filial
15344,3109354-1,24.0,3574.14,148.9225,Minihub-CTBA
15345,2505251,14.0,918.78,65.627143,Minihub-CTBA
15346,2561620,2.0,76.73,38.365,Minihub-CTBA
15347,3108262-1,9.0,361.17,40.13,Minihub-CTBA
15348,2482843,3.0,199.19,66.396667,Minihub-CTBA


In [214]:
# Obter os 10 SKUs para Minihub-CTBA
df_top_receita_Minihub_CTBA = filtro_Minihub_CTBA.nlargest(10, 'receita')[['sku', 'nm_filial', 'receita']]
df_top_receita_Minihub_CTBA

Unnamed: 0,sku,nm_filial,receita
15613,31027526344,Minihub-CTBA,31593.78
15435,2406968,Minihub-CTBA,22903.78
16502,2492322,Minihub-CTBA,16140.33
16913,2638253,Minihub-CTBA,15872.65
16117,2406972,Minihub-CTBA,15379.59
16700,31017079-3,Minihub-CTBA,13709.93
16491,2689849,Minihub-CTBA,13587.73
16280,2492400,Minihub-CTBA,12646.99
16208,3109351-2,Minihub-CTBA,12361.96
15423,2603869,Minihub-CTBA,12211.57


### Como foram as previsões para os 10 SKUs para Minihub-CTBA com maior receita (180 dias)?

In [215]:
df_Minihub_CTBA = pd.merge(df, df_top_receita_Minihub_CTBA,
                                    on=['sku', 'nm_filial'], how='inner')

In [216]:
df_Minihub_CTBA = df_Minihub_CTBA.loc[:, ['sku', 'erp_setor', 'nm_filial', 'modelo_py', 'previsao_py', 
                             'modelo_st', 'previsao_st', 'Qtde_Fat_Real', 'Receita_Real', 'receita']]
df_Minihub_CTBA.head()

Unnamed: 0,sku,erp_setor,nm_filial,modelo_py,previsao_py,modelo_st,previsao_st,Qtde_Fat_Real,Receita_Real,receita
0,2406968,Higiene e Beleza,Minihub-CTBA,,,SEMANAL,2.2849,2.0,118.84,22903.78
1,2406972,Higiene e Beleza,Minihub-CTBA,linear_regression,3.275158e-14,SEMANAL,1.782692,1.0,62.91,15379.59
2,2406972,Higiene e Beleza,Minihub-CTBA,linear_regression,3.275158e-14,SEMANAL,1.782692,1.0,69.91,15379.59
3,2406972,Higiene e Beleza,Minihub-CTBA,,,SEMANAL,1.782692,2.0,122.32,15379.59
4,2406972,Higiene e Beleza,Minihub-CTBA,linear_regression,4.820144e-12,,,1.0,62.91,15379.59


In [217]:
# Agrupando por sku e filial e calculando a média
df_Minihub_CTBA_media = df_Minihub_CTBA.groupby(['sku', 'nm_filial']).agg({
    'modelo_py': 'first',  
    'modelo_st': 'first',  
    'previsao_st': 'mean',
    'previsao_py': 'mean',
    'Qtde_Fat_Real': 'mean'
}).reset_index()
df_Minihub_CTBA_media

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real
0,2406968,Minihub-CTBA,,SEMANAL,2.2849,,2.0
1,2406972,Minihub-CTBA,linear_regression,SEMANAL,1.711167,2.426448e-12,1.666667
2,2492322,Minihub-CTBA,linear_regression,mape_vmd_90,1.992151,0.9883304,3.0
3,2492400,Minihub-CTBA,linear_regression,mape_vmd_60,1.445902,8.538762e-12,1.8
4,2603869,Minihub-CTBA,,mape_vmd_30,0.096774,,1.0
5,2638253,Minihub-CTBA,linear_regression,mape_vmd_30,0.268817,0.4076707,1.25
6,2689849,Minihub-CTBA,,SEMANAL,0.76049,,1.666667
7,31027526344,Minihub-CTBA,linear_regression,SEMANAL,7.173436,4.608092e-12,15.0


### Ordenar os 10 SKUs com base na maior diferença entre as previsões

In [218]:
# Calcular a diferença absoluta e ordenar da maior diferenca para a menor 
df_Minihub_CTBA_media_sorted = df_Minihub_CTBA_media.assign(diff_abs=lambda x: abs(x['previsao_st'] - x['previsao_py'])).sort_values(by='diff_abs', ascending=False)
df_Minihub_CTBA_media_sorted

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real,diff_abs
7,31027526344,Minihub-CTBA,linear_regression,SEMANAL,7.173436,4.608092e-12,15.0,7.173436
1,2406972,Minihub-CTBA,linear_regression,SEMANAL,1.711167,2.426448e-12,1.666667,1.711167
3,2492400,Minihub-CTBA,linear_regression,mape_vmd_60,1.445902,8.538762e-12,1.8,1.445902
2,2492322,Minihub-CTBA,linear_regression,mape_vmd_90,1.992151,0.9883304,3.0,1.00382
5,2638253,Minihub-CTBA,linear_regression,mape_vmd_30,0.268817,0.4076707,1.25,0.138853
0,2406968,Minihub-CTBA,,SEMANAL,2.2849,,2.0,
4,2603869,Minihub-CTBA,,mape_vmd_30,0.096774,,1.0,
6,2689849,Minihub-CTBA,,SEMANAL,0.76049,,1.666667,


### 7.5 Minihub-POA

In [219]:
#Obter os SKUs com maior receita para Minihub-CTBA
filtro_Minihub_POA = preco_historico.loc[preco_historico['nm_filial'] == 'Minihub-POA']
filtro_Minihub_POA.head()

Unnamed: 0,sku,quantidade,receita,preco_medio,nm_filial
16984,3104164,3.0,339.29,113.096667,Minihub-POA
16985,310236571,60.0,173.75,2.895833,Minihub-POA
16986,2425283,2.0,689.52,344.76,Minihub-POA
16987,2261657,58.0,2737.52,47.198621,Minihub-POA
16988,2492303,29.0,4665.97,160.895517,Minihub-POA


In [220]:
# Obter os 10 SKUs para Minihub-POA
df_top_receita_Minihub_POA = filtro_Minihub_POA.nlargest(10, 'receita')[['sku', 'nm_filial', 'receita']]
df_top_receita_Minihub_POA

Unnamed: 0,sku,nm_filial,receita
17661,2603869,Minihub-POA,27803.81
17830,31027526344,Minihub-POA,27794.47
17403,31010817-2,Minihub-POA,23197.34
18067,2406968,Minihub-POA,19915.14
17979,2492322,Minihub-POA,18854.14
18154,2536357,Minihub-POA,16162.49
17317,31153-3,Minihub-POA,15913.78
17981,2548708,Minihub-POA,13605.66
18403,2406972,Minihub-POA,13476.57
17165,31022435-3,Minihub-POA,12668.89


### Como foram as previsões para os 10 SKUs para Minihub-POA com maior receita (180 dias)?

In [222]:
df_Minihub_POA = pd.merge(df, df_top_receita_Minihub_POA,
                                    on=['sku', 'nm_filial'], how='inner')

In [223]:
df_Minihub_POA = df_Minihub_POA.loc[:, ['sku', 'erp_setor', 'nm_filial', 'modelo_py', 'previsao_py', 
                             'modelo_st', 'previsao_st', 'Qtde_Fat_Real', 'Receita_Real', 'receita']]
df_Minihub_POA.head()

Unnamed: 0,sku,erp_setor,nm_filial,modelo_py,previsao_py,modelo_st,previsao_st,Qtde_Fat_Real,Receita_Real,receita
0,2536357,Alimentos,Minihub-POA,linear_regression,1.14822e-12,mape_vmd_180,0.243094,1.0,297.41,16162.49
1,2536357,Alimentos,Minihub-POA,linear_regression,1.14822e-12,mape_vmd_180,0.243094,1.0,297.38,16162.49
2,2536357,Alimentos,Minihub-POA,linear_regression,3.606337e-12,mape_vmd_180,0.243094,1.0,279.91,16162.49
3,2536357,Alimentos,Minihub-POA,linear_regression,1.208644e-12,,,1.0,314.91,16162.49
4,2536357,Alimentos,Minihub-POA,linear_regression,1.046774e-12,mape_vmd_180,0.243094,2.0,649.8,16162.49


In [224]:
# Agrupando por sku e filial e calculando a média
df_Minihub_POA_media = df_Minihub_POA.groupby(['sku', 'nm_filial']).agg({
    'modelo_py': 'first',  
    'modelo_st': 'first',  
    'previsao_st': 'mean',
    'previsao_py': 'mean',
    'Qtde_Fat_Real': 'mean'
}).reset_index()
df_Minihub_POA_media

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real
0,2406968,Minihub-POA,linear_regression,SEMANAL,2.217229,2.495293e-11,4.4
1,2406972,Minihub-POA,,mape_nulo,2.295082,,5.5
2,2492322,Minihub-POA,linear_regression,mape_vmd_90,2.138461,1.300855,3.7
3,2536357,Minihub-POA,linear_regression,mape_vmd_180,0.243094,1.631639e-12,1.2
4,2548708,Minihub-POA,linear_regression,mape_nulo,0.125,3.570755e-13,1.0
5,2603869,Minihub-POA,linear_regression,mape_vmd_30,0.464516,8.317328e-13,1.6
6,31010817-2,Minihub-POA,linear_regression,mape_vmd_60,0.147541,3.773759e-12,1.333333
7,31022435-3,Minihub-POA,linear_regression,mape_vmd_30,0.580645,2.165564e-12,1.25
8,31153-3,Minihub-POA,linear_regression,mape_vmd_30,0.430108,0.1558892,1.166667


### Ordenar os top SKUs com base na maior diferença entre as previsões

In [225]:
# Calcular a diferença absoluta e ordenar da maior diferenca para a menor 
df_Minihub_POA_media_sorted = df_Minihub_POA_media.assign(diff_abs=lambda x: abs(x['previsao_st'] - x['previsao_py'])).sort_values(by='diff_abs', ascending=False)
df_Minihub_POA_media_sorted

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real,diff_abs
0,2406968,Minihub-POA,linear_regression,SEMANAL,2.217229,2.495293e-11,4.4,2.217229
2,2492322,Minihub-POA,linear_regression,mape_vmd_90,2.138461,1.300855,3.7,0.837606
7,31022435-3,Minihub-POA,linear_regression,mape_vmd_30,0.580645,2.165564e-12,1.25,0.580645
5,2603869,Minihub-POA,linear_regression,mape_vmd_30,0.464516,8.317328e-13,1.6,0.464516
8,31153-3,Minihub-POA,linear_regression,mape_vmd_30,0.430108,0.1558892,1.166667,0.274218
3,2536357,Minihub-POA,linear_regression,mape_vmd_180,0.243094,1.631639e-12,1.2,0.243094
6,31010817-2,Minihub-POA,linear_regression,mape_vmd_60,0.147541,3.773759e-12,1.333333,0.147541
4,2548708,Minihub-POA,linear_regression,mape_nulo,0.125,3.570755e-13,1.0,0.125
1,2406972,Minihub-POA,,mape_nulo,2.295082,,5.5,


### 7.6. Minihub-SP

In [227]:
filtro_Minihub_SP = preco_historico[preco_historico['nm_filial'] == 'Minihub-SP']

df_top_receita_Minihub_SP = filtro_Minihub_SP.nlargest(10, 'receita')[['sku', 'nm_filial', 'receita']]

df_Minihub_SP = pd.merge(df, df_top_receita_Minihub_SP, on=['sku', 'nm_filial'], how='inner') \
                    [['sku', 'erp_setor', 'nm_filial', 'modelo_py', 'previsao_py', 
                      'modelo_st', 'previsao_st', 'Qtde_Fat_Real', 'Receita_Real', 'receita']]

df_Minihub_SP_media = df_Minihub_SP.groupby(['sku', 'nm_filial']).agg({
    'modelo_py': 'first',  
    'modelo_st': 'first',  
    'previsao_st': 'mean',
    'previsao_py': 'mean',
    'Qtde_Fat_Real': 'mean'
}).reset_index()

df_Minihub_SP_media_sorted = df_Minihub_SP_media.assign(diff_abs=lambda x: abs(x['previsao_st'] - x['previsao_py'])).sort_values('diff_abs', ascending=False)
df_Minihub_SP_media_sorted

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real,diff_abs
1,2492322,Minihub-SP,vmd,mape_vmd_60,2.827869,12.0,8.6,9.172131
8,3102396,Minihub-SP,linear_regression,mape_vmd_60,0.327869,1.516777,2.0,1.188908
5,2638255,Minihub-SP,linear_regression,mape_vmd_180,1.779006,0.705286,1.0,1.073719
7,2689851,Minihub-SP,vmd,SEMANAL,1.914482,0.916542,5.846154,0.99794
0,2406968,Minihub-SP,linear_regression,SEMANAL,6.698139,5.770739,6.8,0.9274
3,2603869,Minihub-SP,linear_regression,mape_vmd_30,0.541935,1.268839,2.0,0.726903
4,2638253,Minihub-SP,linear_regression,mape_vmd_180,0.873717,1.239085,1.25,0.365367
2,2492400,Minihub-SP,vmd,mape_vmd_180,3.287293,3.455494,3.0,0.168201
6,2689849,Minihub-SP,vmd,mape_vmd_15,5.347222,5.442603,8.6,0.095381
9,3109351-2,Minihub-SP,linear_regression,mape_vmd_15,0.5,0.452811,1.8,0.047189


### 7.7 Minihub-BH

In [228]:
filtro_Minihub_BH = preco_historico[preco_historico['nm_filial'] == 'Minihub-BH']

df_top_receita_Minihub_BH = filtro_Minihub_BH.nlargest(10, 'receita')[['sku', 'nm_filial', 'receita']]

df_Minihub_BH = pd.merge(df, df_top_receita_Minihub_BH, on=['sku', 'nm_filial'], how='inner') \
                    [['sku', 'erp_setor', 'nm_filial', 'modelo_py', 'previsao_py', 
                      'modelo_st', 'previsao_st', 'Qtde_Fat_Real', 'Receita_Real', 'receita']]

df_Minihub_BH_media = df_Minihub_BH.groupby(['sku', 'nm_filial']).agg({
    'modelo_py': 'first',  
    'modelo_st': 'first',  
    'previsao_st': 'mean',
    'previsao_py': 'mean',
    'Qtde_Fat_Real': 'mean'
}).reset_index()

df_Minihub_BH_media_sorted = df_Minihub_BH_media.assign(diff_abs=lambda x: abs(x['previsao_st'] - x['previsao_py'])).sort_values('diff_abs', ascending=False)
df_Minihub_BH_media_sorted

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real,diff_abs
0,2406968,Minihub-BH,linear_regression,mape_vmd_180,0.530387,0.280393,1.0,0.249994
7,3104949,Minihub-BH,linear_regression,mape_vmd_30,0.193548,0.016451,1.5,0.177097
6,3104948,Minihub-BH,linear_regression,mape_vmd_60,0.147541,0.095272,1.5,0.052269
5,3104422-2,Minihub-BH,linear_regression,mape_nulo,0.0625,0.081262,1.0,0.018762
3,2638255,Minihub-BH,vmd,mape_vmd_180,0.171271,0.165746,1.0,0.005525
4,2773874,Minihub-BH,vmd,mape_nulo,0.0625,0.0625,1.333333,0.0
1,2536357,Minihub-BH,,mape_nulo,0.331492,,1.0,
2,2536359,Minihub-BH,,mape_nulo,0.229508,,1.0,


### 7.8 Minihub-RJ

In [233]:
filtro_Minihub_RJ = preco_historico[preco_historico['nm_filial'] == 'Minihub-RJ']

df_top_receita_Minihub_RJ = filtro_Minihub_RJ.nlargest(10, 'receita')[['sku', 'nm_filial', 'receita']]

df_Minihub_RJ = pd.merge(df, df_top_receita_Minihub_RJ, on=['sku', 'nm_filial'], how='inner') \
                    [['sku', 'erp_setor', 'nm_filial', 'modelo_py', 'previsao_py', 
                      'modelo_st', 'previsao_st', 'Qtde_Fat_Real', 'Receita_Real', 'receita']]

df_Minihub_RJ_media = df_Minihub_RJ.groupby(['sku', 'nm_filial']).agg({
    'modelo_py': 'first',  
    'modelo_st': 'first',  
    'previsao_st': 'mean',
    'previsao_py': 'mean',
    'Qtde_Fat_Real': 'mean'
}).reset_index()

df_Minihub_RJ_media_sorted = df_Minihub_RJ_media.assign(diff_abs=lambda x: abs(x['previsao_st'] - x['previsao_py'])).sort_values('diff_abs', ascending=False)
df_Minihub_RJ_media_sorted

Unnamed: 0,sku,nm_filial,modelo_py,modelo_st,previsao_st,previsao_py,Qtde_Fat_Real,diff_abs


In [234]:
df_top_receita_Minihub_RJ

Unnamed: 0,sku,nm_filial,receita
26146,2406968,Minihub-RJ,24710.5
25168,3102396,Minihub-RJ,24174.85
26310,31027518113,Minihub-RJ,22238.73
27003,31017079-3,Minihub-RJ,21459.56
24809,2616989,Minihub-RJ,20949.02
26999,2661128,Minihub-RJ,19338.62
25292,2638255,Minihub-RJ,18943.88
26647,31021721-3,Minihub-RJ,18725.7
27142,31014070-1,Minihub-RJ,18579.8
25659,3105886,Minihub-RJ,18138.54


In [235]:
df_Minihub_RJ

Unnamed: 0,sku,erp_setor,nm_filial,modelo_py,previsao_py,modelo_st,previsao_st,Qtde_Fat_Real,Receita_Real,receita
