In [2]:
import sys
import os
import numpy as np
import pandas as pd
from typing import  Optional, Callable, Dict
from collections.abc import Mapping
import re, unicodedata


sys.path.append(os.path.abspath('..'))
from core.s3 import S3AssetManager

from core.viz import (
        plot_gauge_grid,
        plot_bar,
        plot_heatmap,
        plot_pie,
        plot_time_heatmap
)

In [3]:
notebook_name = "exp_aliforte_prod_reprocesos"
s3 = S3AssetManager(notebook_name=notebook_name)

In [58]:
df_prod = s3.read_csv(f"s3://{s3.bucket_name}/data/aliforte_production/ops_pel.csv")
df_prod.head()

Unnamed: 0,date,start_time,end_time,accumulated_time,downtime,feed_type,lot,quantity_tm,temperature_c,operator,...,accumulated_td,hour,downtime_str,downtime_td,downtime_hour,start_hour,ts_round,normalize_notes,details_notes,specie
0,2025-04-08,12:20:00,15:00:00,03:45:00,00:00:00,HY-8,31067.0,,80.2,Jose Benalcazar,...,0 days 03:45:00,3.75,00:00:00,0 days 00:00:00,0.0,2025-04-08 12:20:00,2025-04-08 12:00:00,,Productivo,reproductoras
1,2025-04-08,15:00:00,16:05:00,,00:00:00,HY-8,31067.0,10.0,80.3,Johnny Tirira,...,,,00:00:00,0 days 00:00:00,0.0,2025-04-08 15:00:00,2025-04-08 15:00:00,,Productivo,reproductoras
2,2025-04-08,16:05:00,17:12:00,01:07:00,00:00:00,HY-1,31065.0,2.0,76.2,Johnny Tirira,...,0 days 01:07:00,1.116667,00:00:00,0 days 00:00:00,0.0,2025-04-08 16:05:00,2025-04-08 16:00:00,,Productivo,reproductoras
3,2025-04-08,17:12:00,18:55:00,01:43:00,00:00:00,E-1,31073.0,3.0,73.2,Johnny Tirira,...,0 days 01:43:00,1.716667,00:00:00,0 days 00:00:00,0.0,2025-04-08 17:12:00,2025-04-08 17:00:00,,Productivo,broiler
4,2025-05-08,10:00:00,15:00:00,11:00:00,00:00:00,E-1,31076.0,,82.1,Jose Benalcazar,...,0 days 11:00:00,11.0,00:00:00,0 days 00:00:00,0.0,2025-05-08 10:00:00,2025-05-08 10:00:00,,Productivo,broiler


In [59]:

reprocess = pd.read_excel("../raw/Consolidado reprocesos.xlsx", sheet_name="Consolidado")
reprocess.columns = reprocess.columns.str.strip().str.lower()
reprocess.rename(columns={"lote": "lot", "cantidad kilos": "repro_kg", "causa reproceso": "repro_cause"}, inplace=True)

In [60]:
reprocess["lot"].duplicated().sum()

6

In [61]:
reprocess[reprocess.duplicated(subset=["lot"], keep=False)]

Unnamed: 0,unnamed: 0,mes,código,lot,repro_kg,repro_cause,responsable
15,,MARZO,C1,30055.0,4920,DIFERENTE COLOR,Nutrición
16,,MARZO,C1,30055.0,5000,DIFERENTE COLOR,Nutrición
23,,MARZO,APE2,30166.0,3560,MOJADO,Producción
24,,MARZO,APE2,30166.0,780,MOJADO,Producción
49,,JUNIO,BAF,30448.0,160,MOJADO,Ventas
50,,JUNIO,BAF,30448.0,840,MOJADO,Ventas
59,,JUNIO,Mezcla de PT,,3080,RESIDUO,Producción
60,,JUNIO,Mezcla de MP,,2640,RESIDUO,Producción
70,,AGOSTO,PF01,31118.0,695,GRUMOS,Producción
73,,AGOSTO,PF01,31118.0,1875,DOSIFICACION,Jonathan Gomez


In [62]:
reprocess = reprocess.drop_duplicates(subset=["lot"], keep="last")

In [63]:
rev = pd.merge(
    df_prod, 
    reprocess,
    on="lot",
    how="left"
)

rev["repross"] = np.where(rev["repro_kg"]>0, "S", "N")

In [64]:
rev["repross"].value_counts(dropna=False)

N    947
S     21
Name: repross, dtype: int64

In [81]:


# 1. Usamos to_datetime con errors='coerce'
# 'coerce' es clave: si hay una celda con basura, la convierte en NaT (nulo) en vez de romper el código
rev['start_dt'] = pd.to_datetime(rev['start_time'].astype(str), errors='coerce')
rev['end_dt'] = pd.to_datetime(rev['end_time'].astype(str), errors='coerce')

# 2. Calculamos la diferencia
rev['diff'] = rev['end_dt'] - rev['start_dt']

# 3. Convertimos a horas
rev['horas_diferencia'] = rev['diff'].dt.total_seconds() / 3600

# 4. Corrección para turnos nocturnos (ej: empieza 23:00, termina 02:00)
# Si la diferencia es negativa, significa que cruzó la medianoche, sumamos 24h
rev.loc[rev['horas_diferencia'] < 0, 'horas_diferencia'] += 24
rev["performance"] = rev["quantity_tm"] / rev["horas_diferencia"]


In [82]:
rev.columns

Index(['date', 'start_time', 'end_time', 'accumulated_time', 'downtime',
       'feed_type', 'lot', 'quantity_tm', 'temperature_c', 'operator', 'notes',
       'pellet', 'month', 'accumulated_td', 'hour', 'downtime_str',
       'downtime_td', 'downtime_hour', 'start_hour', 'ts_round',
       'normalize_notes', 'details_notes', 'specie', 'unnamed: 0', 'mes',
       'código', 'repro_kg', 'repro_cause', 'responsable', 'repross',
       'start_dt', 'end_dt', 'start_delta', 'diff', 'horas_diferencia',
       'performance'],
      dtype='object')

In [83]:
rev[rev['repross']=='S'][['lot', 'pellet', 'date', 'feed_type', 'specie', "performance",
 'start_time', 'end_time', 'hour', 'horas_diferencia', 'normalize_notes', 'repro_kg', 'quantity_tm',  'repro_cause']]

Unnamed: 0,lot,pellet,date,feed_type,specie,performance,start_time,end_time,hour,horas_diferencia,normalize_notes,repro_kg,quantity_tm,repro_cause
12,31129.0,pellet 1,2025-12-08,HY-7,reproductoras,4.0,13:00:00,14:00:00,1.0,1.0,,4040.0,4.0,PROGRAMACIÓN
98,31422.0,pellet 1,2025-09-20,APP,ponedora,,07:40:00,13:00:00,5.333333,5.333333,,3600.0,,QUEMADO
99,31422.0,pellet 1,2025-09-22,APP,ponedora,,08:30:00,14:30:00,6.0,6.0,Atasque,3600.0,,QUEMADO
107,31422.0,pellet 1,2025-09-25,APP,ponedora,,08:10:00,11:25:00,3.25,3.25,,3600.0,,QUEMADO
254,31131.0,pellet 2,2025-08-08,PF02,cerdos,1.793103,10:10:00,17:25:00,7.25,7.25,,12575.0,13.0,DOSIFICACION
255,31118.0,pellet 2,2025-08-08,PF01,cerdos,0.923077,18:10:00,21:25:00,3.25,3.25,,1875.0,3.0,DOSIFICACION
276,31178.0,pellet 2,2025-08-16,BAI,ponedora,,09:35:00,12:00:00,10.75,2.416667,,2400.0,,GRUMOS
277,31178.0,pellet 2,2025-08-16,BAI,ponedora,,12:00:00,18:00:00,,6.0,,2400.0,,GRUMOS
278,31178.0,pellet 2,2025-08-18,BAI,ponedora,6.153846,06:00:00,09:15:00,,3.25,Falla Caldero,2400.0,20.0,GRUMOS
371,31387.0,pellet 2,2025-09-15,CF,cerdos,,13:50:00,16:30:00,2.666667,2.666667,,14360.0,,CADUCADO


In [84]:
notes_join = lambda s: ", ".join(
    s.dropna()
     .astype(str)
     .str.strip()
     .loc[lambda x: x.ne("")]
     .unique()
)

In [91]:
qa_proccess =rev[rev['repross'] == 'S'].groupby(["pellet", "responsable", "operator"], dropna=False).agg(
    repro_kg=("repro_kg", "sum"),
    count=("lot", "count"),
    quantity_tm=("quantity_tm", "sum"),
    causa=('repro_cause', notes_join),
).reset_index()
qa_proccess["repro_tm"] = qa_proccess["repro_kg"] / 1_000
qa_proccess["razon_qa"] = qa_proccess["repro_tm"] / qa_proccess["quantity_tm"]*100
qa_proccess

Unnamed: 0,pellet,responsable,operator,repro_kg,count,quantity_tm,causa,repro_tm,razon_qa
0,pellet 1,Jose Benalcazar,Jose Benalcazar,10800.0,3,0.0,QUEMADO,10.8,inf
1,pellet 1,Ventas,Johnny Tirira,4040.0,1,4.0,PROGRAMACIÓN,4.04,101.0
2,pellet 2,Jonathan Gomez,Brayan Erazo,14450.0,2,16.0,DOSIFICACION,14.45,90.3125
3,pellet 2,Jonathan Gomez,Johnny Tirira,10280.0,1,10.0,DIFERENTE COLOR,10.28,102.8
4,pellet 2,José Cabezas y Bryan Erazo,Brayan Erazo,4800.0,2,20.0,GRUMOS,4.8,24.0
5,pellet 2,José Cabezas y Bryan Erazo,Jose Cabezas,2400.0,1,0.0,GRUMOS,2.4,inf
6,pellet 2,Ventas,Brayan Erazo,28720.0,2,30.0,CADUCADO,28.72,95.733333
7,pellet 3,Bryan Erazo,Brayan Erazo,620.0,1,5.0,MEZCLADO,0.62,12.4
8,pellet 3,Jordan Gallegos,Brayan Erazo,8660.0,1,33.0,DOSIFICACION,8.66,26.242424
9,pellet 3,Jordan Gallegos,Jose Cabezas,8660.0,1,0.0,DOSIFICACION,8.66,inf


In [93]:
qa_proccess =rev[rev['repross'] == 'S'].groupby(["operator"], dropna=False).agg(
    repro_kg=("repro_kg", "sum"),
    count=("lot", "count"),
    quantity_tm=("quantity_tm", "sum"),
    causa=('repro_cause', notes_join),
).reset_index()
qa_proccess["repro_tm"] = qa_proccess["repro_kg"] / 1_000
qa_proccess["razon_qa"] = qa_proccess["repro_tm"] / qa_proccess["quantity_tm"]*100
qa_proccess

Unnamed: 0,operator,repro_kg,count,quantity_tm,causa,repro_tm,razon_qa
0,Brayan Erazo,73610.0,10,122.0,"DOSIFICACION, GRUMOS, CADUCADO, PROGRAMACIÓN, ...",73.61,60.336066
1,Johnny Tirira,14320.0,2,14.0,"PROGRAMACIÓN, DIFERENTE COLOR",14.32,102.285714
2,Jose Benalcazar,10800.0,3,0.0,QUEMADO,10.8,inf
3,Jose Cabezas,13460.0,6,36.0,"GRUMOS, QUEMADO, MOJADO, DOSIFICACION",13.46,37.388889
