# Setup

In [18]:
%run notebook_setup.py

Now you can import modules from the project root: /bi/workspace/Projects/Forecast/forecast


In [19]:
import pandas as pd

In [20]:
import gc

In [21]:
from src.config.dir_config import OUTPUT_PATH_DEMMAND_SUMMARY,OUTPUT_PATH_PROCESSED_WEEKLY_SALES

# Read data

In [None]:
data_genex = pd.read_parquet('/bi/workspace/Projects/Forecast/forecast/data/processed/data_genex.parquet')

In [23]:
weekly_sales_df = pd.read_parquet(OUTPUT_PATH_PROCESSED_WEEKLY_SALES)

# Functions

In [24]:
# Recalcula los valores permitidos y filtra por categoría de forma segura
def filter_dataframe(df, excludes_stores, excludes_deptos, exclude_ecom_deptos=True):
    if exclude_ecom_deptos:
        deptos_ecom = df[df['nombre_depto'].str.contains('e-com', case=False)]['nombre_depto'].unique().tolist()
        excludes_deptos = excludes_deptos + deptos_ecom

    df = df.loc[~df['cod_sucursal'].isin(excludes_stores)]
    df = df.loc[~df['nombre_depto'].isin(excludes_deptos)]

    # Si 'nombre_depto' y 'cod_sucursal' son categorías, eliminamos categorías no usadas
    for col in ['nombre_depto']:
        if isinstance(df[col].dtype, pd.CategoricalDtype):
            df[col] = df[col].cat.remove_unused_categories()

    return df

# Filters

In [25]:
excludes_deptos = ['Miscelaneos', 'Bolsas y bolsos']
excludes_stores = [707, 767]

In [26]:
# Filtrar y guardar weekly_sales_df directamente
#weekly_sales_df = filter_dataframe(weekly_sales_df, excludes_stores, excludes_deptos)
#weekly_sales_df.to_parquet(OUTPUT_PATH_PROCESSED_WEEKLY_SALES, index=False)

# Week by Week

In [28]:
total_sales_by_season_store=weekly_sales_df.groupby([
    'cod_sucursal','nombre_sucursal','nombre_temporada','ano_temporada',
], observed=True).agg(
    total_sales = ('weekly_sales', 'sum'),
).reset_index()

In [29]:
weekly_sales_df.columns

Index(['cod_sucursal', 'cod_producto', 'cod_talla', 'cod_sku',
       'cod_ano_comercial', 'cod_semana', 'weekly_sales', 'stock_start_week',
       'stock_end_week', 'mnt_venta_neta', 'mnt_costo_venta', 'reposition',
       'weekly_available_stock', 'flag_sale', 'flag_inventory_available',
       'flag_repo', 'flag_stockout', 'nombre_sucursal', 'tipo_sucursal',
       'nombre_temporada', 'ano_temporada', 'nombre_depto', 'nombre_linea',
       'nom_talla'],
      dtype='object')

In [30]:
weekly_store_sales = weekly_sales_df.groupby(
    ['nombre_temporada','ano_temporada','cod_sucursal','cod_ano_comercial','cod_semana',], observed=False).agg(
    sku_with_inventory=('flag_inventory_available', 'sum'),
    sku_with_sales=('flag_sale', 'sum'),
    sku_with_stockout=('flag_stockout', 'sum'),
).reset_index()

# Sample

In [31]:
productos = [273139, 273139,631139]
stores = [32]

## Sample dataframes

In [33]:
weekly_sales_df_sample = weekly_sales_df[(weekly_sales_df['cod_producto'].isin(productos)) & (weekly_sales_df['cod_sucursal'].isin(stores))].reset_index(drop=True).copy()
weekly_sales_df_sample = weekly_sales_df_sample.drop(columns=['mnt_venta_neta', 'mnt_costo_venta'])

data_genex_sample = data_genex[(data_genex['cod_producto'].isin(productos)) & (data_genex['cod_sucursal'].isin(stores))].reset_index(drop=True).copy()

NameError: name 'data_genex' is not defined

# Excel export

In [34]:
with pd.ExcelWriter('../sandbox/weekly_sale_sample.xlsx') as writer:
    weekly_sales_df_sample.to_excel(writer, sheet_name='weekly_sales_df_sample', index=False)
    #data_genex_sample.to_excel(writer, sheet_name='data_genex_sample', index=False)
    total_sales_by_season_store.to_excel(writer, sheet_name='total_sales_by_season_store', index=False)