In [1]:
import pandas as pd
import gzip
import os
import numpy as np

 # A la empresa le interesa el rastreo de lo que marketing considera los productos y lo que ventas considera los mejores clientes.

## Mejores Productos
- product_id = {20001, 20002, 20003, 20004, 20005, 20006, 20007, 20009, 20011, 20032} (diez productos)

## Mejores Clientes
- customer_id = {10001, 10002, 10003, 10004, 10005, 10006, 10007, 10008, 10009, 10011, 10012, 10013} (doce clientes)

Total de <producto, cliente> a predecir = 10 * 12 = 120

# Objetivo
- Es el 01-enero-2020 a las 00:01 y disponibilizamos las ventas del periodo 2021912.
- El 02-enero a las 18:00 nos deben entregar:
  - El primer forecast de ventas para cada producto que se harán durante el mes 202002, de forma que nuestras plantas puedan fabricarlos durante el mes de 202001.
  - El segundo forecast es las ventas esperadas en 202002, para los 120 pares de <mejores_clientes, mejores_productos>.

In [2]:
####################################################
############# Setear segun cada maquina ############
#os.chdir("C:/Users/herna/labo3_empresa3_repo/datasets")
os.chdir("C:/diego_tools/labo3/dataset")
####################################################

In [3]:
arch_sellout = "tb_sellout_02.txt.gz"
arch_maestro_prod = "maestro_productos_depurado.csv"
arch_exogenas = "emp3_exogenas.csv"
arch_prod_ids_prediccion = "productos_a_predecir.csv"

In [4]:
# Variables para definir que atributos se descartan
meses_para_control_vigencia = [201904,201905,201906] #meses en los cuales deben aparecer los productos para ser considerados vigentes (NO discontinuados) y ser tomados en la prediccion
tope_fecha_historia = 201902 #los productos que aparezcan desde este mes (inclusive) en adelante, se excluyen por tener poca historia de ventas

In [5]:
def diferencia_meses(d1, d2):
    return (d1.year - d2.year) * 12 + d1.month - d2.month

## Sellout

In [6]:
# Abrir el archivo .gz y cargarlo en un DataFrame
with gzip.open(arch_sellout, 'rt') as archivo:
    # Leer el archivo línea por línea
    df_sellout = pd.read_csv(archivo,sep="\t")

In [7]:
df_sellout.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2945818 entries, 0 to 2945817
Data columns (total 7 columns):
 #   Column                 Dtype  
---  ------                 -----  
 0   periodo                int64  
 1   customer_id            int64  
 2   product_id             int64  
 3   plan_precios_cuidados  int64  
 4   cust_request_qty       int64  
 5   cust_request_tn        float64
 6   tn                     float64
dtypes: float64(2), int64(5)
memory usage: 157.3 MB


In [8]:
# Por las dudas, eliminamos duplicados
print(len(df_sellout))
df_sellout.drop_duplicates(inplace=True)
print(len(df_sellout))

2945818
2945818


**--> sin duplicados**

In [9]:
df_sellout.isna().sum()

periodo                  0
customer_id              0
product_id               0
plan_precios_cuidados    0
cust_request_qty         0
cust_request_tn          0
tn                       0
dtype: int64

**--> sin nulos**

In [10]:
df_sellout.periodo.unique()                

array([201701, 201702, 201703, 201704, 201705, 201706, 201707, 201708,
       201709, 201710, 201711, 201712, 201801, 201802, 201803, 201804,
       201805, 201806, 201807, 201808, 201809, 201810, 201811, 201812,
       201901, 201902, 201903, 201904, 201905, 201906, 201907, 201908,
       201909, 201910, 201911, 201912], dtype=int64)

In [11]:
len(df_sellout.product_id.unique())          

1233

--> algunos productos no van a tener descripción

In [12]:
len(df_sellout.customer_id.unique())            

597

In [13]:
df_sellout.plan_precios_cuidados.unique()          

array([0, 1], dtype=int64)

In [14]:
df_sellout.head()

Unnamed: 0,periodo,customer_id,product_id,plan_precios_cuidados,cust_request_qty,cust_request_tn,tn
0,201701,10234,20524,0,2,0.053,0.053
1,201701,10032,20524,0,1,0.13628,0.13628
2,201701,10217,20524,0,1,0.03028,0.03028
3,201701,10125,20524,0,1,0.02271,0.02271
4,201701,10012,20524,0,11,1.54452,1.54452


In [15]:
# Como control, sumo tns
tn_suma_original = round(sum(df_sellout.tn))
print("Toneladas Total Control:", round(sum(df_sellout.tn),0))

Toneladas Total Control: 1324989.0


In [16]:
# Agrupo por producto y por periodo
df_sellout_prod = df_sellout.groupby(['periodo','product_id']).agg({'tn': 'sum', 'cust_request_tn':'sum', 'cust_request_qty':'sum','plan_precios_cuidados':'max'}).reset_index()
print("Toneladas Total Control:", round(sum(df_sellout_prod.tn),0), tn_suma_original==round(sum(df_sellout_prod.tn),0))

Toneladas Total Control: 1324989.0 True


## Descarte de Productos que no hay que predecir

In [17]:
# Discontinuados (sin ventas en 3 meses mas adelante)
product_ids_vigentes =  df_sellout_prod[df_sellout_prod.periodo.isin(meses_para_control_vigencia)].product_id.unique()
print("Vigentes:", len(product_ids_vigentes))

product_ids_discontinuados = set(df_sellout_prod.product_id.unique()).difference(set(product_ids_vigentes))
print("Discontinuados:", len(product_ids_discontinuados))

#Sin historia suficiente (minima fecha >= 201902)
product_ids_sin_hist_sufic = df_sellout_prod.groupby("product_id").agg({"periodo":"min"}).reset_index()
product_ids_sin_hist_sufic = product_ids_sin_hist_sufic[product_ids_sin_hist_sufic.periodo>=tope_fecha_historia]
print(product_ids_sin_hist_sufic.head(5))
product_ids_sin_hist_sufic = product_ids_sin_hist_sufic.product_id.unique()
print("Menos 3 meses hist:", len(product_ids_sin_hist_sufic))

#Interseccion y union
print("Interseccion: ", len((set(product_ids_discontinuados).intersection(set(product_ids_sin_hist_sufic)))))
print("Union: ", len((set(product_ids_discontinuados).union(set(product_ids_sin_hist_sufic)))))


product_ids_para_predecir = set(df_sellout_prod.product_id.unique()).difference((set(product_ids_discontinuados).union(set(product_ids_sin_hist_sufic))))
print("\nTotal:", len(product_ids_para_predecir))

Vigentes: 958
Discontinuados: 275
     product_id  periodo
31        20032   201902
126       20127   201909
173       20174   201906
208       20210   201909
211       20213   201908
Menos 3 meses hist: 182
Interseccion:  92
Union:  365

Total: 868


In [18]:
#Se guardan en un csv aparte
df_prods_prediccion = pd.DataFrame(data={"product_id":list(product_ids_para_predecir)})
df_prods_prediccion.to_csv(arch_prod_ids_prediccion, index=False)

## Completado de Períodos sin Ventas

* Los productos que no aparecen un mes van a ser completados con un registro en 0
* Sin embargo, se completará a partir de su primer mes (y no para atrás)

In [19]:
periodos = df_sellout_prod.periodo.unique()
cant_periodos = len(periodos)
periodos, cant_periodos

(array([201701, 201702, 201703, 201704, 201705, 201706, 201707, 201708,
        201709, 201710, 201711, 201712, 201801, 201802, 201803, 201804,
        201805, 201806, 201807, 201808, 201809, 201810, 201811, 201812,
        201901, 201902, 201903, 201904, 201905, 201906, 201907, 201908,
        201909, 201910, 201911, 201912], dtype=int64),
 36)

In [20]:
productos = df_sellout_prod.product_id.unique()
cant_productos = len(productos)
cant_productos

1233

In [21]:
len(df_sellout_prod),cant_productos*cant_periodos

(31243, 44388)

**--> no todos los productos están en todos los períodos**

In [22]:
# Obtengo el primer mes de cada producto
df_primer_mes_prod = df_sellout_prod.groupby("product_id").agg({"periodo":"min"}).reset_index()
df_primer_mes_prod = df_primer_mes_prod.rename(columns={"periodo":"primer_periodo"})
df_primer_mes_prod.tail()

Unnamed: 0,product_id,primer_periodo
1228,21295,201701
1229,21296,201708
1230,21297,201701
1231,21298,201708
1232,21299,201708


In [23]:
# Se va a poner 0 a todos los periodos donde el producto no se vendio
df_cartesiano = pd.DataFrame(data={"product_id":productos}).merge(pd.DataFrame(data={"periodo":periodos}), how='cross')
df_cartesiano["imputado"] = 0
len(df_cartesiano)

44388

In [24]:
df_cartesiano.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 44388 entries, 0 to 44387
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype
---  ------      --------------  -----
 0   product_id  44388 non-null  int64
 1   periodo     44388 non-null  int64
 2   imputado    44388 non-null  int64
dtypes: int64(3)
memory usage: 1.4 MB


In [25]:
df_cartesiano.head()

Unnamed: 0,product_id,periodo,imputado
0,20001,201701,0
1,20001,201702,0
2,20001,201703,0
3,20001,201704,0
4,20001,201705,0


In [26]:
df_sellout_prod_complet = df_cartesiano.merge(df_sellout_prod, how="left",on=["product_id","periodo"])
len(df_sellout_prod_complet)

44388

In [27]:
df_sellout_prod_complet.head()

Unnamed: 0,product_id,periodo,imputado,tn,cust_request_tn,cust_request_qty,plan_precios_cuidados
0,20001,201701,0,934.77222,937.72717,479.0,0.0
1,20001,201702,0,798.0162,833.72187,432.0,0.0
2,20001,201703,0,1303.35771,1330.74697,509.0,0.0
3,20001,201704,0,1069.9613,1132.9443,279.0,0.0
4,20001,201705,0,1502.20132,1550.68936,701.0,0.0


In [28]:
df_sellout_prod_complet.isna().sum()

product_id                   0
periodo                      0
imputado                     0
tn                       13145
cust_request_tn          13145
cust_request_qty         13145
plan_precios_cuidados    13145
dtype: int64

In [29]:
# Imputo
df_sellout_prod_complet.imputado = np.where(df_sellout_prod_complet.tn.isna(),1,0)
df_sellout_prod_complet.tn = np.where(df_sellout_prod_complet.imputado==0,df_sellout_prod_complet.tn,0)
df_sellout_prod_complet.cust_request_tn = np.where(df_sellout_prod_complet.imputado==0,df_sellout_prod_complet.cust_request_tn,0)
df_sellout_prod_complet.cust_request_qty = np.where(df_sellout_prod_complet.imputado==0,df_sellout_prod_complet.cust_request_qty,0)
df_sellout_prod_complet.plan_precios_cuidados = np.where(df_sellout_prod_complet.imputado==0,df_sellout_prod_complet.plan_precios_cuidados,0)

In [30]:
df_sellout_prod_complet.isna().sum()

product_id               0
periodo                  0
imputado                 0
tn                       0
cust_request_tn          0
cust_request_qty         0
plan_precios_cuidados    0
dtype: int64

In [31]:
df_sellout_prod_complet.imputado.sum(),len(df_cartesiano)-len(df_sellout_prod)

(13145, 13145)

In [32]:
# Ahora, cruzo con el primer mes y borro aquellos registros previos al primer mes de datos
print(len(df_sellout_prod_complet))
df_sellout_prod_complet_desde_1er_mes = df_sellout_prod_complet.merge(df_primer_mes_prod,on="product_id",how="inner")
print(len(df_sellout_prod_complet_desde_1er_mes))

df_sellout_prod_complet_desde_1er_mes = df_sellout_prod_complet_desde_1er_mes[df_sellout_prod_complet_desde_1er_mes.periodo >= df_sellout_prod_complet_desde_1er_mes.primer_periodo]
print(len(df_sellout_prod_complet_desde_1er_mes))
df_sellout_prod_complet_desde_1er_mes.tail(10)

44388
44388
35888


Unnamed: 0,product_id,periodo,imputado,tn,cust_request_tn,cust_request_qty,plan_precios_cuidados,primer_periodo
44206,21242,201911,0,0.27418,0.27418,60.0,0.0,201910
44207,21242,201912,0,0.21128,0.21128,33.0,0.0,201910
44241,21247,201910,0,0.18871,0.18871,38.0,0.0,201910
44242,21247,201911,0,0.22311,0.22311,54.0,0.0,201910
44243,21247,201912,0,0.14544,0.14544,37.0,0.0,201910
44278,20728,201911,0,16.02237,16.02237,73.0,0.0,201911
44279,20728,201912,0,14.31612,14.31612,98.0,0.0,201911
44315,20770,201912,0,3.185,3.185,6.0,0.0,201912
44351,20792,201912,0,3.80934,3.80934,7.0,0.0,201912
44387,20854,201912,0,3.96397,3.96397,7.0,0.0,201912


In [33]:
df_sellout_prod_complet_desde_1er_mes['periodo_fecha'] = pd.to_datetime(df_sellout_prod_complet_desde_1er_mes['periodo'], format='%Y%m')

# Se agrega una variable que tenga el mes y tambien la cantidad de meses de historia del producto
df_sellout_prod_complet_desde_1er_mes["mes"] = pd.DatetimeIndex(df_sellout_prod_complet_desde_1er_mes.periodo_fecha).month
df_sellout_prod_complet_desde_1er_mes['primer_periodo_fecha'] = pd.to_datetime(df_sellout_prod_complet_desde_1er_mes['primer_periodo'], format='%Y%m')
df_sellout_prod_complet_desde_1er_mes["meses_historia"]=df_sellout_prod_complet_desde_1er_mes.apply(lambda row: diferencia_meses(row["periodo_fecha"],row["primer_periodo_fecha"]),axis=1)

# Dejo unicamente la columna de meses_historia
df_sellout_prod_complet_desde_1er_mes = df_sellout_prod_complet_desde_1er_mes.drop(columns=["primer_periodo","primer_periodo_fecha"])

In [34]:
df_sellout_prod_complet_desde_1er_mes.tail(10)

Unnamed: 0,product_id,periodo,imputado,tn,cust_request_tn,cust_request_qty,plan_precios_cuidados,periodo_fecha,mes,meses_historia
44206,21242,201911,0,0.27418,0.27418,60.0,0.0,2019-11-01,11,1
44207,21242,201912,0,0.21128,0.21128,33.0,0.0,2019-12-01,12,2
44241,21247,201910,0,0.18871,0.18871,38.0,0.0,2019-10-01,10,0
44242,21247,201911,0,0.22311,0.22311,54.0,0.0,2019-11-01,11,1
44243,21247,201912,0,0.14544,0.14544,37.0,0.0,2019-12-01,12,2
44278,20728,201911,0,16.02237,16.02237,73.0,0.0,2019-11-01,11,0
44279,20728,201912,0,14.31612,14.31612,98.0,0.0,2019-12-01,12,1
44315,20770,201912,0,3.185,3.185,6.0,0.0,2019-12-01,12,0
44351,20792,201912,0,3.80934,3.80934,7.0,0.0,2019-12-01,12,0
44387,20854,201912,0,3.96397,3.96397,7.0,0.0,2019-12-01,12,0


## Incorporo Maestro y Exógenas

In [35]:
df_maestro_prod = pd.read_csv(arch_maestro_prod)

In [36]:
df_maestro_prod.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1251 entries, 0 to 1250
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   cat1               1251 non-null   object
 1   cat2               1251 non-null   object
 2   cat3               1251 non-null   object
 3   brand              1251 non-null   object
 4   sku_size           1251 non-null   int64 
 5   product_id         1251 non-null   int64 
 6   producto_estrella  1251 non-null   int64 
dtypes: int64(3), object(4)
memory usage: 68.5+ KB


In [37]:
df_maestro_prod.head()

Unnamed: 0,cat1,cat2,cat3,brand,sku_size,product_id,producto_estrella
0,HC,ROPA LAVADO,Liquido,LIMPIEX,900,20280,0
1,HC,ROPA LAVADO,Liquido,LIMPIEX,450,20180,0
2,HC,ROPA LAVADO,Liquido,LIMPIEX,120,20332,0
3,HC,ROPA LAVADO,Liquido,LIMPIEX,450,20222,0
4,HC,ROPA LAVADO,Liquido,LIMPIEX,900,20288,0


In [38]:
prods_desconocidos = set(df_sellout_prod_complet_desde_1er_mes.product_id).difference(set(df_maestro_prod.product_id))
print(len(prods_desconocidos))

45


In [39]:
df_tn_prod_desc = df_sellout_prod_complet_desde_1er_mes[df_sellout_prod_complet_desde_1er_mes.product_id.isin(prods_desconocidos)]
df_tn_prod_desc.tn.sum()

360.20867999999996

In [40]:
print(len(df_sellout_prod_complet_desde_1er_mes))
df_sellout_prod_complet_desde_1er_mes = pd.merge(df_sellout_prod_complet_desde_1er_mes, df_maestro_prod, on='product_id', how='left')
print(len(df_sellout_prod_complet_desde_1er_mes))
df_sellout_prod_complet_desde_1er_mes.head()

35888
35888


Unnamed: 0,product_id,periodo,imputado,tn,cust_request_tn,cust_request_qty,plan_precios_cuidados,periodo_fecha,mes,meses_historia,cat1,cat2,cat3,brand,sku_size,producto_estrella
0,20001,201701,0,934.77222,937.72717,479.0,0.0,2017-01-01,1,0,HC,ROPA LAVADO,Liquido,ARIEL,3000.0,1.0
1,20001,201702,0,798.0162,833.72187,432.0,0.0,2017-02-01,2,1,HC,ROPA LAVADO,Liquido,ARIEL,3000.0,1.0
2,20001,201703,0,1303.35771,1330.74697,509.0,0.0,2017-03-01,3,2,HC,ROPA LAVADO,Liquido,ARIEL,3000.0,1.0
3,20001,201704,0,1069.9613,1132.9443,279.0,0.0,2017-04-01,4,3,HC,ROPA LAVADO,Liquido,ARIEL,3000.0,1.0
4,20001,201705,0,1502.20132,1550.68936,701.0,0.0,2017-05-01,5,4,HC,ROPA LAVADO,Liquido,ARIEL,3000.0,1.0


In [41]:
df_sellout_prod_complet_desde_1er_mes.cat1 = np.where(df_sellout_prod_complet_desde_1er_mes.product_id.isin(prods_desconocidos),"desconocida",df_sellout_prod_complet_desde_1er_mes.cat1)
df_sellout_prod_complet_desde_1er_mes.cat2 = np.where(df_sellout_prod_complet_desde_1er_mes.product_id.isin(prods_desconocidos),"desconocida",df_sellout_prod_complet_desde_1er_mes.cat2)
df_sellout_prod_complet_desde_1er_mes.cat3 = np.where(df_sellout_prod_complet_desde_1er_mes.product_id.isin(prods_desconocidos),"desconocida",df_sellout_prod_complet_desde_1er_mes.cat3)
df_sellout_prod_complet_desde_1er_mes.brand = np.where(df_sellout_prod_complet_desde_1er_mes.product_id.isin(prods_desconocidos),"desconocida",df_sellout_prod_complet_desde_1er_mes.brand)
df_sellout_prod_complet_desde_1er_mes.sku_size = np.where(df_sellout_prod_complet_desde_1er_mes.product_id.isin(prods_desconocidos),0,df_sellout_prod_complet_desde_1er_mes.sku_size)
df_sellout_prod_complet_desde_1er_mes.producto_estrella = np.where(df_sellout_prod_complet_desde_1er_mes.product_id.isin(prods_desconocidos),0,df_sellout_prod_complet_desde_1er_mes.producto_estrella)

In [42]:
df_exogenas = pd.read_csv(arch_exogenas)
df_exogenas.periodo_fecha = pd.to_datetime(df_exogenas.periodo_fecha)

df_exogenas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48 entries, 0 to 47
Data columns (total 7 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   periodo_fecha              48 non-null     datetime64[ns]
 1   temp_media                 48 non-null     float64       
 2   temp_max_media             48 non-null     float64       
 3   temp_min_media             48 non-null     float64       
 4   IPC                        48 non-null     float64       
 5   promedio_mens_dolar_venta  48 non-null     float64       
 6   catastrofe                 48 non-null     bool          
dtypes: bool(1), datetime64[ns](1), float64(5)
memory usage: 2.4 KB


In [43]:
# Le agregamos las exogenas
print(len(df_sellout_prod_complet_desde_1er_mes))
df_sellout_prod_complet_desde_1er_mes = pd.merge(df_sellout_prod_complet_desde_1er_mes,df_exogenas,on="periodo_fecha",how="left")
print(len(df_sellout_prod_complet_desde_1er_mes))

35888
35888


In [44]:
# Como control, sumo tns
print("Toneladas Total Control:", round(sum(df_sellout_prod_complet_desde_1er_mes.tn),0),tn_suma_original==round(sum(df_sellout_prod_complet_desde_1er_mes.tn),0))

Toneladas Total Control: 1324989.0 True


In [45]:
df_sellout_prod_complet_desde_1er_mes.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 35888 entries, 0 to 35887
Data columns (total 22 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   product_id                 35888 non-null  int64         
 1   periodo                    35888 non-null  int64         
 2   imputado                   35888 non-null  int32         
 3   tn                         35888 non-null  float64       
 4   cust_request_tn            35888 non-null  float64       
 5   cust_request_qty           35888 non-null  float64       
 6   plan_precios_cuidados      35888 non-null  float64       
 7   periodo_fecha              35888 non-null  datetime64[ns]
 8   mes                        35888 non-null  int64         
 9   meses_historia             35888 non-null  int64         
 10  cat1                       35888 non-null  object        
 11  cat2                       35888 non-null  object        
 12  cat3

In [46]:
df_sellout_prod_complet_desde_1er_mes = df_sellout_prod_complet_desde_1er_mes.sort_values(by=["product_id","periodo"],ascending=True)

In [47]:
# Ordeno para que luego el FE funcione correctamente usando SHIFT
df_sellout_prod_complet_desde_1er_mes.head(50)

Unnamed: 0,product_id,periodo,imputado,tn,cust_request_tn,cust_request_qty,plan_precios_cuidados,periodo_fecha,mes,meses_historia,...,cat3,brand,sku_size,producto_estrella,temp_media,temp_max_media,temp_min_media,IPC,promedio_mens_dolar_venta,catastrofe
0,20001,201701,0,934.77222,937.72717,479.0,0.0,2017-01-01,1,0,...,Liquido,ARIEL,3000.0,1.0,25.65,30.4,20.9,1.6,15.91,False
1,20001,201702,0,798.0162,833.72187,432.0,0.0,2017-02-01,2,1,...,Liquido,ARIEL,3000.0,1.0,25.35,30.1,20.6,2.1,15.59,False
2,20001,201703,0,1303.35771,1330.74697,509.0,0.0,2017-03-01,3,2,...,Liquido,ARIEL,3000.0,1.0,22.45,26.9,18.0,2.4,15.52,False
3,20001,201704,0,1069.9613,1132.9443,279.0,0.0,2017-04-01,4,3,...,Liquido,ARIEL,3000.0,1.0,18.9,23.5,14.3,2.7,15.36,False
4,20001,201705,0,1502.20132,1550.68936,701.0,0.0,2017-05-01,5,4,...,Liquido,ARIEL,3000.0,1.0,15.9,19.5,12.3,1.4,15.72,False
5,20001,201706,0,1520.06539,1575.82891,570.0,0.0,2017-06-01,6,5,...,Liquido,ARIEL,3000.0,1.0,13.7,17.9,9.5,1.2,16.12,False
6,20001,201707,0,1030.67391,1086.47101,381.0,0.0,2017-07-01,7,6,...,Liquido,ARIEL,3000.0,1.0,13.35,17.0,9.7,1.7,17.19,False
7,20001,201708,0,1267.39462,1289.66869,643.0,0.0,2017-08-01,8,7,...,Liquido,ARIEL,3000.0,1.0,15.15,19.2,11.1,1.4,17.42,False
8,20001,201709,0,1316.94604,1356.96103,381.0,0.0,2017-09-01,9,8,...,Liquido,ARIEL,3000.0,1.0,15.95,20.3,11.6,1.9,18.15,False
9,20001,201710,0,1439.75563,1441.60247,273.0,0.0,2017-10-01,10,9,...,Liquido,ARIEL,3000.0,1.0,17.15,23.0,11.3,1.5,17.46,False


In [48]:
df_sellout_prod_complet_desde_1er_mes.isna().sum()

product_id                   0
periodo                      0
imputado                     0
tn                           0
cust_request_tn              0
cust_request_qty             0
plan_precios_cuidados        0
periodo_fecha                0
mes                          0
meses_historia               0
cat1                         0
cat2                         0
cat3                         0
brand                        0
sku_size                     0
producto_estrella            0
temp_media                   0
temp_max_media               0
temp_min_media               0
IPC                          0
promedio_mens_dolar_venta    0
catastrofe                   0
dtype: int64

In [49]:
# Exportar el DataFrame a un archivo CSV
df_sellout_prod_complet_desde_1er_mes.to_csv("emp3_sellout_base_period_product.csv", index=False)