In [45]:
import pandas as pd
import numpy as np
import re
import sys
import os
sys.path.append(os.path.abspath(".."))
from core.viz import plot_line, create_subplot_grid, plot_bar, plot_statistical_strip, plot_pie, plot_scatter, plot_distribution, plot_dynamic_trends
from core.s3 import S3AssetManager

In [46]:
notebook_name = "exp_nutrinor_qa_data"
s3 = S3AssetManager(notebook_name=notebook_name)
BULTOS_TO_TON = 0.04

In [47]:
hist_pel1 = pd.read_excel("../raw/Rendimiento pelletizadoras 2024.xlsx",
 sheet_name="rendimiento pellet 1 (420)",
 skiprows=1)
hist_pel1["pellet"] = "pellet 420"

hist_pel2 = pd.read_excel("../raw/Rendimiento pelletizadoras 2024.xlsx",
 sheet_name="rendimiento pellet 2 (520)",
 skiprows=1)
hist_pel2["pellet"] = "pellet 520"

hist_pel3 = pd.read_excel("../raw/Rendimiento pelletizadoras 2024.xlsx",
 sheet_name="rendimiento pellet 3 (350)",
 skiprows=1)
hist_pel3["pellet"] = "pellet 350"

hist_pel = pd.concat([hist_pel1, hist_pel2, hist_pel3])

rename_cols = {
    "año": "year",
    "mes": "month",
    "semana": "week",
    "Fecha": "date",
    "Producto": "product",
    "Pelletizador": "pelletizer",
    "Hora inicial ": "start_time",
    "Hora final ": "end_time",
    "tiempo trabajado": "work_time",
    "tiempo trabajado (minutos)": "work_time_minutes",
    "tiempo trabajado (horas)": "work_time_hours",
    "bultos": "bags",
    "durabilidad (%)": "durability_pct",
    "carga (Hz)": "load_hz",
    "toneladas": "tons",
    "ton/hora": "tons_per_hour",
    "lote": "lot",
    "Dado": "die",
    "ROLLER": "roller",
    "Ton/Dia": "tons_per_day",
    "Horas/dia": "hours_per_day",
    "Ton/x Hora": "tons_per_hour_alt",
    "Ton/semanal": "tons_weekly",
    "Hora/semanal": "hours_weekly",
    "prom/sem": "avg_weekly",
    "Ton/mensual": "tons_monthly",
    "Hora/mensuales": "hours_monthly",
    "prom/mensual": "avg_monthly",
    "OBSERVACIONES": "notes",
}
hist_pel = hist_pel.rename(columns=rename_cols)

hist_pel["date"] = pd.to_datetime(hist_pel["date"], format="%Y-%m-%d")
hist_pel = hist_pel[hist_pel["date"].notna()]

s = hist_pel["lot"].astype(str).str.strip()
mask_digits = s.str.fullmatch(r"\d+")

hist_pel_digits = hist_pel.loc[mask_digits].copy()      
hist_pel_nondigits = hist_pel.loc[~mask_digits].copy()
#hist_pel_digits['tons'] = hist_pel_digits['bags'] * BULTOS_TO_TON


Data Validation extension is not supported and will be removed


Data Validation extension is not supported and will be removed


Data Validation extension is not supported and will be removed



In [48]:



# 1) Define el "diccionario" de nombres canónicos
ALIASES = {
    "wilson": "Wilson",
    "wIlson".lower(): "Wilson",
    "johan": "Johan",
    "yohan": "Johan",   # <-- si quieres unificar Yohan como Johan
    "luis": "Luis",
    "felipe": "Felipe",
    "daniel": "Daniel",
    "jorge": "Jorge",
    "sigifredo": "Sigifredo",
    "pelletizadores": None,  # o "Equipo" si prefieres
}

# 2) Orden fijo para combinaciones (para que quede consistente)
ORDER = ["Daniel", "Felipe", "Johan", "Jorge", "Luis", "Sigifredo", "Wilson"]
ORDER_IDX = {name: i for i, name in enumerate(ORDER)}

def normalize_pelletizador(x: object) -> object:
    if x is None or (isinstance(x, float) and np.isnan(x)):
        return np.nan

    s = str(x).strip()
    if s == "":
        return np.nan

    # separa por "-" y limpia espacios extra
    parts = [p.strip() for p in s.split("-") if p.strip()]

    canon = []
    for p in parts:
        key = p.strip().lower()
        key = re.sub(r"\s+", " ", key)  # espacios múltiples
        mapped = ALIASES.get(key, None)
        if mapped is None:
            # Si no está en alias, intenta Title Case como fallback
            mapped = p.strip().title()
        canon.append(mapped)

    # elimina Nones (ej. Pelletizadores->None) y duplicados manteniendo únicos
    canon = [c for c in canon if c]
    canon = list(dict.fromkeys(canon))

    if not canon:
        return np.nan

    # ordena para unificar combinaciones (Luis-Wilson = Wilson-Luis)
    canon.sort(key=lambda n: ORDER_IDX.get(n, 999))

    return "-".join(canon)




In [49]:
cls_num = [
    'work_time',
    'tons',
    'durability_pct',
    'work_time_hours',
]
for cl in cls_num:
    hist_pel_digits[cl] = pd.to_numeric(hist_pel_digits[cl], errors='coerce')

# Ejemplo de uso con una columna:
hist_pel_digits["pelletizer"] = hist_pel_digits["pelletizer"].apply(normalize_pelletizador)

In [50]:
notes_join = lambda s: ", ".join(
    s.dropna()
     .astype(str)
     .str.strip()
     .loc[lambda x: x.ne("")]
     .unique()
)

In [51]:
rend_pel = hist_pel_digits.groupby(['lot']).agg(
                date=('date', 'first'),
                batch=('lot', 'count'),
                pellet=("pellet", notes_join),
                product=("product", notes_join),
                start_time=('start_time', 'max'),
                end_time=('end_time', 'last'),
                work_time=('work_time', 'sum'),
                work_time_hours=('work_time_hours', 'sum'),
                tons=('tons', 'sum'),
                durability_pct=('durability_pct', 'mean'),
                die=('die', 'first'),
                roller=('roller', 'first'),
                #notes=("notes", notes_join),
                pelletizer=('pelletizer', 'first'),
).reset_index()
rend_pel["pelletizer"].value_counts(dropna=False)

pelletizer
Johan-Luis             1147
Luis                   1079
Johan-Wilson            704
Wilson                  547
Felipe                  475
Jorge-Wilson            468
Felipe-Wilson           347
None                    323
Felipe-Sigifredo        303
Johan                   220
Felipe-Luis             182
Felipe-Jorge            180
Luis-Wilson             170
Felipe-Johan             78
Daniel-Wilson            74
Sigifredo-Wilson         56
Luis-Sigifredo           49
Jorge-Luis               34
Johan-Jorge              10
Jorge                    10
Sigifredo                 7
Johan-Sigifredo           6
Jorge-Sigifredo           5
Johan-Luis-Wilson         4
Pelletizadores            4
Felipe-Luis-Wilson        1
Felipe-Johan-Wilson       1
Name: count, dtype: int64

In [52]:


def recategorizar_producto(nombre):
    # Validación inicial: si es nulo o no es string
    if pd.isna(nombre):
        return "DESCONOCIDO"
    
    # Convertimos a string y mayúsculas para asegurar coincidencias
    nombre = str(nombre).upper()
    
    # 1. Detectar EXPERIMENTAL
    # CORRECCIÓN: Se agregó 'x' antes del 'in'
    if any(x in nombre for x in ['DIETA', 'TRATAMIENTO', 'ENSAYO', 'T1', 'T2']):
        return 'EXPERIMENTAL'
        
    # 2. Detectar AVES
    if any(x in nombre for x in ['POLLO', 'POLLITO', 'PONEDORA']):
        return 'AVICULTURA'
        
    # 3. Detectar MASCOTAS / EQUINOS
    if 'PERRO' in nombre:
        return 'MASCOTAS'
    if 'EQUINOS' in nombre:
        return 'EQUINOS'
        
    # 4. Detectar BOVINOS (Ganadería)
    claves_bovinos = ['LECHE', 'GANADERIA', 'TERNERA', 'ALTIPLANO', 'PREPARTO', 'PROBIOLECHE', 'VACUNACION']
    if any(x in nombre for x in claves_bovinos):
        return 'BOVINOS'
        
    # 5. Detectar PORCINOS
    claves_porcinos = ['CERDO', 'MARRANA', 'LECHON', 'SUPERCERDO', 'MAGRO', 'PREINICI', 'INICIAD', 'GESTACION', 'LACTANCIA']
    if any(x in nombre for x in claves_porcinos):
        return 'PORCINOS'
    
    # 6. Reglas de desempate (Si dice etapa pero no animal, asumimos Porcinos por tu contexto de Maquila)
    claves_etapas = ['LEVANTE', 'ENGORDE', 'FINALIZA', 'CRECIMIENTO', 'DESARROLLO', 'RECIBO']
    if any(x in nombre for x in claves_etapas):
        return 'PORCINOS'
        
    return 'OTROS'


In [53]:
path = "../raw/SACK OFF HISTORICO.xlsx"

cols_out = ["date", "lot", "product", "theoretical_bag", "actual_bag", "difference", "sackoff", "pellet"]

def build_block(df: pd.DataFrame, cols_in: list[str], pellet_label: str) -> pd.DataFrame:
    return (
        df.loc[:, cols_in]
          .copy()
          .rename(columns={
              cols_in[0]: "date",
              cols_in[1]: "lot",
              cols_in[2]: "product",
              cols_in[3]: "theoretical_bag",
              cols_in[4]: "actual_bag",
              cols_in[5]: "difference",
              cols_in[6]: "sackoff",
          })
          .assign(pellet=pellet_label)
          .loc[:, cols_out]
    )

# Especificación de los bloques por hoja (igual a tu caso AGOSTO 25)
BLOCKS = [
    (["Fecha",   "Lote",   "Producto 420",     "Cant O.P",   "Cant.Prod",   "Diferencia",   "Sackoff"],   "pellet 420"),
    (["Fecha.1", "Lote.1", "Producto 520",     "Cant O.P.1", "Cant.Prod.1", "Diferencia.1", "Sackoff.1"], "pellet 520"),
    (["Fecha.2", "Lote.2", "Producto 350",     "Cant O.P.2", "Cant.Prod.2", "Diferencia.2", "Sackoff.2"], "pellet 350"),
    (["Fecha.3", "Lote.3", "producto harina",  "Cant O.P.3", "Cant.Prod.3", "Diferencia.3", "Sackoff.3"], "harina"),
    (["Fecha.4", "Lote.4", "producto granel",  "Cant O.P.4", "Cant.Prod.4", "Diferencia.4", "Sackoff.4"], "granel"),
]

MONTH_PAT = r"(ENERO|FEBRERO|MARZO|ABRIL|MAYO|JUNIO|JULIO|AGOSTO|SEPTIEMBRE|OCTUBRE|NOVIEMBRE|DICIEMBRE|NOV|DIC)"
re_month_sheet = re.compile(rf"^{MONTH_PAT}\s+\d{{2}}$", re.IGNORECASE)

xlf = pd.ExcelFile(path)
month_sheets = [s for s in xlf.sheet_names if re_month_sheet.match(s.strip())]

all_dfs = []
for sh in month_sheets:
    df = pd.read_excel(xlf, sheet_name=sh, skiprows=0)

    blocks = []
    for cols_in, label in BLOCKS:
        # si alguna columna no existe en esa hoja, se salta ese bloque
        if all(c in df.columns for c in cols_in):
            blocks.append(build_block(df, cols_in, label))

    if blocks:
        out = pd.concat(blocks, ignore_index=True).assign(sheet_name=sh)
        all_dfs.append(out)

hist_sackoff_by_month = pd.concat(all_dfs, ignore_index=True)
hist_sackoff_by_month["date"] = pd.to_datetime(hist_sackoff_by_month["date"], errors="coerce")
hist_sackoff_by_month = hist_sackoff_by_month[hist_sackoff_by_month["date"].notna()]

hist_sackoff_by_month["theoretical_tons"] = hist_sackoff_by_month["theoretical_bag"] * 0.04
hist_sackoff_by_month["actual_tons"] = hist_sackoff_by_month["actual_bag"] * 0.04

df = hist_sackoff_by_month.copy()

df["lot"] = (
    df["lot"]
    .astype(str)
    .str.strip()
    .str.replace(r"\.0$", "", regex=True)
    .replace({"nan": pd.NA, "None": pd.NA, "": pd.NA})
)
mask_digits = df["lot"].str.fullmatch(r"\d+").fillna(False)
hist_sackoff_month_digits = df.loc[mask_digits].copy()
hist_sackoff_month_nondigits = df.loc[~mask_digits].copy()
hist_sackoff_month_digits["month"] = pd.to_datetime(hist_sackoff_month_digits["date"]).dt.strftime('%Y-%m')


# Suponiendo que tu lista está en un DataFrame llamado df
hist_sackoff_month_digits['cat_product'] = df['product'].apply(recategorizar_producto)

In [54]:
hist_sackoff_month_digits['cat_product'].value_counts(dropna=False)

cat_product
PORCINOS        4259
BOVINOS         1709
OTROS           1026
DESCONOCIDO      376
AVICULTURA       100
EXPERIMENTAL      57
EQUINOS           44
MASCOTAS          14
Name: count, dtype: int64

In [55]:
hist_sackoff_tot = pd.read_excel("../raw/SACK OFF HISTORICO.xlsx",
 sheet_name="TOTAL",
 skiprows=0)

rename_cols = {
    "AÑO": "year",
    "MES": "month",
    "FECHA": "date",
    "LOTE": "lot",
    "PRODUCTO": "product",
    "TEORICO": "theoretical_bag",
    "REAL": "actual_bag",
    "DIFERENCIA": "difference_bag",
    "SACKOFF": "sackoff",
    "ENSAQUE": "pellet",
}

hist_sackoff_tot.columns = hist_sackoff_tot.columns.map(rename_cols)
hist_sackoff_tot["date"] = pd.to_datetime(hist_sackoff_tot["date"], format="%Y-%m-%d")
hist_sackoff_tot = hist_sackoff_tot[hist_sackoff_tot["date"].notna()]


df = hist_sackoff_tot.copy()

df["lot"] = (
    df["lot"].astype(str).str.strip()
      .str.replace(r"\.0$", "", regex=True)
      .replace({"nan": pd.NA, "None": pd.NA, "": pd.NA})
)

mask_digits = df["lot"].str.fullmatch(r"\d+").fillna(False)

hist_sackoff_digits = df.loc[mask_digits].copy()
hist_sackoff_nondigits = df.loc[~mask_digits].copy()

# convertir lot a entero nullable (ya que son solo dígitos)
# (opcional) si date está como string, normaliza
hist_sackoff_digits["date"] = pd.to_datetime(hist_sackoff_digits["date"], errors="coerce")

# calcular tons (asumiendo 1 bulto = 40kg => 0.04 ton)
hist_sackoff_digits["theoretical_tons"] = hist_sackoff_digits["theoretical_bag"] * 0.04
hist_sackoff_digits["actual_tons"] = hist_sackoff_digits["actual_bag"] * 0.04
hist_sackoff_digits["difference_tons"] = hist_sackoff_digits["difference_bag"] * 0.04  
hist_sackoff_digits["month"] = pd.to_datetime(hist_sackoff_digits["date"]).dt.strftime('%Y-%m')

hist_sackoff_digits["pellet"] = hist_sackoff_digits["pellet"].astype(str).str.upper().str.strip()

hist_sackoff_digits["pellet"] = hist_sackoff_digits["pellet"].replace({
        '420': 'pellet 420',
        '520': 'pellet 520',
        '350': 'pellet 350',
        'HNA': 'harina',
        '420G': 'granel',
        '520G': 'granel',
        '350G': 'granel',
        '420 G': 'granel',
        '520 G': 'granel',
        '402 G': 'granel',
})

hist_sackoff_digits["pellet"].unique()


array(['pellet 420', 'pellet 520', 'pellet 350', 'harina', 'granel'],
      dtype=object)

In [56]:
grp_month_hist_sackoff = hist_sackoff_month_digits.groupby(["month", "pellet"]).agg(
    theoretical_tons=("theoretical_tons", "sum"),
    actual_tons=("actual_tons", "sum"),
).reset_index()

grp_month_hist_sackoff["diff"] = grp_month_hist_sackoff["actual_tons"] - grp_month_hist_sackoff["theoretical_tons"]
grp_month_hist_sackoff["sackoff"] = (grp_month_hist_sackoff["diff"]/grp_month_hist_sackoff["theoretical_tons"])*100
f_line = plot_line(
    df=grp_month_hist_sackoff[grp_month_hist_sackoff["month"]>='2025-01-01'],
    x_col="month",
    y_col="sackoff",
    group_col="pellet",
    width=900,
    height=500,
)
f_line.show()
s3.save_plotly_html(f_line, "line_hist_month_sackoff_pel_month.html")

In [57]:
grp_month_hist_sackoff = hist_sackoff_month_digits.groupby(["month"]).agg(
    theoretical_tons=("theoretical_tons", "sum"),
    actual_tons=("actual_tons", "sum"),
).reset_index()

grp_month_hist_sackoff["diff"] = grp_month_hist_sackoff["actual_tons"] - grp_month_hist_sackoff["theoretical_tons"]
grp_month_hist_sackoff["sackoff"] = (grp_month_hist_sackoff["diff"]/grp_month_hist_sackoff["theoretical_tons"])*100
f_line_month = plot_line(
    df=grp_month_hist_sackoff[grp_month_hist_sackoff["month"]>='2025-01-01'],
    x_col="month",
    y_col="sackoff",
    text_format=".2f",
    width=900,
    height=500,
    
)
f_line_month.show()
s3.save_plotly_html(f_line_month, "line_hist_month_sackoff_month.html")

In [58]:
hist_sackoff_month_digits["pellet"].unique()

array(['pellet 420', 'pellet 520', 'pellet 350', 'harina', 'granel'],
      dtype=object)

In [59]:
#sackoff por dia y pellet


grp_date_pellet = hist_sackoff_month_digits.groupby(["date","pellet"]).agg(
    theoretical_tons=("theoretical_tons", "sum"),
    actual_tons=("actual_tons", "sum"),
).reset_index()

grp_date_pellet["diff"] = grp_date_pellet["actual_tons"] - grp_date_pellet["theoretical_tons"]
grp_date_pellet["sackoff"] = (grp_date_pellet["diff"]/grp_date_pellet["theoretical_tons"])*100
grp_date_pellet


Unnamed: 0,date,pellet,theoretical_tons,actual_tons,diff,sackoff
0,2024-05-01,pellet 420,112.24,110.84,-1.40,-1.247327
1,2024-05-01,pellet 520,138.20,135.68,-2.52,-1.823444
2,2024-05-02,pellet 420,96.00,95.16,-0.84,-0.875000
3,2024-05-02,pellet 520,62.00,61.40,-0.60,-0.967742
4,2024-05-03,pellet 420,40.24,40.68,0.44,1.093439
...,...,...,...,...,...,...
1667,2025-12-16,granel,150.24,149.71,-0.53,-0.352769
1668,2025-12-16,harina,16.00,16.12,0.12,0.750000
1669,2025-12-16,pellet 350,55.48,54.32,-1.16,-2.090844
1670,2025-12-16,pellet 420,44.00,44.00,0.00,0.000000


In [60]:
for pel in grp_date_pellet.pellet.unique():
    f = plot_dynamic_trends(
        df=grp_date_pellet[grp_date_pellet.pellet==pel], 
        date_col="date", 
        value_col="sackoff", 
        category_col="pellet", 
            windows=[7, 15, 30],
        width=1200, 
        height=400  
    )
    f.show()
    name = f"sackoff_{pel.replace(' ', '_')}.html"
    print(name)
    s3.save_plotly_html(f, name)

sackoff_pellet_420.html


sackoff_pellet_520.html


sackoff_harina.html


sackoff_granel.html


sackoff_pellet_350.html


# Particularidades del Sackofff

In [61]:
add_hist_sackoff_month_digits = pd.merge(
    hist_sackoff_month_digits,
    rend_pel.loc[:,['lot','start_time', 'end_time',
       'work_time', 'work_time_hours', 'tons', 'durability_pct', 'die',
       'roller', 'pelletizer']],
on="lot",
how="left"
)

s = add_hist_sackoff_month_digits["die"].fillna("").astype(str)

add_hist_sackoff_month_digits["die"] = np.where(
    s=="",
    "sin información",
    "dado " + s
)


grp_date_pellet = add_hist_sackoff_month_digits.groupby(["date","pellet","die"]).agg(
    theoretical_tons=("theoretical_tons", "sum"),
    actual_tons=("actual_tons", "sum"),
).reset_index()

grp_date_pellet["diff"] = grp_date_pellet["actual_tons"] - grp_date_pellet["theoretical_tons"]
grp_date_pellet["sackoff"] = (grp_date_pellet["diff"]/grp_date_pellet["theoretical_tons"])*100
add_hist_sackoff_month_digits.columns

Index(['date', 'lot', 'product', 'theoretical_bag', 'actual_bag', 'difference',
       'sackoff', 'pellet', 'sheet_name', 'theoretical_tons', 'actual_tons',
       'month', 'cat_product', 'start_time', 'end_time', 'work_time',
       'work_time_hours', 'tons', 'durability_pct', 'die', 'roller',
       'pelletizer'],
      dtype='object')

In [62]:
DEFAULT_COLORS = ["#1C8074", "#666666", "#E4572E", "#29B6F6", "#FFA726"]
plot_statistical_strip

<function core.viz.plot_statistical_strip(df: pandas.core.frame.DataFrame, x_col: str, y_col: str, category_order: Optional[List[str]] = None, color_map: Optional[Dict[str, str]] = None, show_boxplot: bool = True, show_mean_ci: bool = True, show_global_mean: bool = True, show_counts: bool = True, title: str = '', x_title: Optional[str] = None, y_title: Optional[str] = None, point_opacity: float = 0.6, point_size: int = 7, box_opacity: float = 0.25, height: int = 600, width: int = 1000, filename: Optional[str] = None) -> plotly.graph_objs._figure.Figure>

In [63]:
f = plot_statistical_strip(
        df=grp_date_pellet,
        x_col="die",
        y_col="sackoff",
        )
f.show()

In [64]:
for pl in  grp_date_pellet["pellet"].unique():
    print(pl)
    f = plot_statistical_strip(
        df=grp_date_pellet[grp_date_pellet["pellet"] == pl],
        x_col="die",
        y_col="sackoff",
        title=f"<b>Sackoff por die en  {pl}</b>",
        )
    f.show()
    name = f"dado_sackoff_{pl.replace(' ', '_')}.html"
    print(name)
    s3.save_plotly_html(f, name)

#tienen a pesar mas hacial el sackofff

pellet 420


dado_sackoff_pellet_420.html
pellet 520


dado_sackoff_pellet_520.html
harina


dado_sackoff_harina.html
granel


dado_sackoff_granel.html
pellet 350


dado_sackoff_pellet_350.html


In [65]:
#que pasa con las harinas

cond_harina = add_hist_sackoff_month_digits["pellet"] == "harina"
harinas = add_hist_sackoff_month_digits[cond_harina]

cond_date = harinas["date"].between("2024-01-21", "2025-04-26")
harinas["etapa"] = np.where(cond_date, "malo", "bueno")


harinas_grouped =  harinas.groupby(["etapa", "pelletizer"], dropna=False).agg(
    count=('lot', 'nunique'),
    theoretical_tons=("theoretical_tons", "mean"),
    actual_tons=("actual_tons", "mean"),
    sackoff=('sackoff', 'mean'),
    sackoff_median=('sackoff', 'median'),
).reset_index()
harinas_grouped["diff"] = harinas_grouped["actual_tons"] - harinas_grouped["theoretical_tons"]
harinas_grouped["sackoff"] = (harinas_grouped["diff"]/harinas_grouped["theoretical_tons"])*100
harinas_grouped.sort_values("sackoff", inplace=True)
harinas_grouped



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,etapa,pelletizer,count,theoretical_tons,actual_tons,sackoff,sackoff_median,diff
19,malo,Sigifredo,1,9.0,8.67,-3.666667,-0.036667,-0.33
14,malo,Johan,19,18.2159,17.7813,-2.385828,-0.019153,-0.4346
20,malo,Sigifredo-Wilson,8,15.635556,15.266667,-2.359295,-0.016691,-0.368889
21,malo,Wilson,28,17.8263,17.425667,-2.247428,-0.016111,-0.400633
10,malo,Felipe,33,17.012471,16.688235,-1.905868,-0.017333,-0.324235
22,malo,,539,19.24049,18.916721,-1.68275,-0.0168,-0.323769
11,malo,Felipe-Johan,1,17.0,16.73,-1.588235,-0.015882,-0.27
15,malo,Johan-Luis,53,16.897385,16.656481,-1.425686,-0.015149,-0.240904
13,malo,Felipe-Wilson,25,17.286432,17.04152,-1.416787,-0.018529,-0.244912
17,malo,Luis,35,22.924639,22.610967,-1.368276,-0.015883,-0.313672


In [66]:
cond_granel = add_hist_sackoff_month_digits["pellet"] == "granel"
cond_date = add_hist_sackoff_month_digits["date"].between("2025-09-21", "2025-09-30")

granel_good = add_hist_sackoff_month_digits[cond_granel & cond_date]
granel_good.groupby("pelletizer", dropna=False).agg(
    count=('lot', 'nunique'),
    theoretical_tons=("theoretical_tons", "mean"),
    actual_tons=("actual_tons", "mean"),
    sackoff=('sackoff', 'mean'),
    sackoff_median=('sackoff', 'median'),
)

Unnamed: 0_level_0,count,theoretical_tons,actual_tons,sackoff,sackoff_median
pelletizer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Felipe,9,16.145444,15.955644,-0.011976,-0.01
Luis,15,14.013333,13.969867,-0.002268,-0.002667
Wilson,14,18.940914,18.644,-0.013052,-0.007513
,12,16.0,15.939167,-0.001455,-0.001188


In [67]:
cond_harina = add_hist_sackoff_month_digits["pellet"] == "harina"
cond_date = add_hist_sackoff_month_digits["date"].between("2024-01-21", "2025-04-26")

harina_bad = add_hist_sackoff_month_digits[cond_harina & cond_date]
harina_bad.groupby("pelletizer", dropna=False).agg(
    count=('lot', 'nunique'),
    theoretical_tons=("theoretical_tons", "mean"),
    actual_tons=("actual_tons", "mean"),
    sackoff=('sackoff', 'mean'),
    sackoff_median=('sackoff', 'median'),
)


Unnamed: 0_level_0,count,theoretical_tons,actual_tons,sackoff,sackoff_median
pelletizer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Felipe,33,17.012471,16.688235,-0.017663,-0.017333
Felipe-Johan,1,17.0,16.73,-0.015882,-0.015882
Felipe-Sigifredo,32,14.828176,14.701818,-0.006966,-0.010909
Felipe-Wilson,25,17.286432,17.04152,-0.012564,-0.018529
Johan,19,18.2159,17.7813,-0.021845,-0.019153
Johan-Luis,53,16.897385,16.656481,-0.013483,-0.015149
Johan-Wilson,3,19.888333,19.69,-0.008021,-0.014379
Luis,35,22.924639,22.610967,-0.016253,-0.015883
Luis-Wilson,2,20.3,20.1,-0.005863,-0.005863
Sigifredo,1,9.0,8.67,-0.036667,-0.036667


In [68]:
cond_harina = add_hist_sackoff_month_digits["pellet"] == "harina"
cond_date = add_hist_sackoff_month_digits["date"].between("2025-04-26", "2025-12-26")

harina_good = add_hist_sackoff_month_digits[cond_harina & cond_date]
harina_good.groupby("pelletizer", dropna=False).agg(
    count=('lot', 'nunique'),
    theoretical_tons=("theoretical_tons", "mean"),
    actual_tons=("actual_tons", "mean"),
    sackoff=('sackoff', 'mean'),
    sackoff_median=('sackoff', 'median'),
)

Unnamed: 0_level_0,count,theoretical_tons,actual_tons,sackoff,sackoff_median
pelletizer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Felipe,6,7.666667,7.753333,0.015159,0.007143
Felipe-Luis,1,14.0,14.12,0.008571,0.008571
Felipe-Wilson,1,14.0,14.08,0.005714,0.005714
Johan,1,14.04,14.16,0.008547,0.008547
Johan-Luis,3,12.666667,12.786667,0.010095,0.011429
Jorge-Wilson,1,4.0,4.2,0.05,0.05
Luis,4,8.01,8.17,0.032212,0.023
Luis-Sigifredo,1,8.0,8.04,0.005,0.005
Wilson,2,14.0,14.08,0.005714,0.005714
,416,11.061053,11.134989,0.013784,0.005698


In [141]:
grp_tot_hist_sackoff = hist_sackoff_digits.groupby(["month", "pellet"]).agg(
    theoretical_tons=("theoretical_tons", "sum"),
    actual_tons=("actual_tons", "sum"),
).reset_index()

grp_tot_hist_sackoff["diff"] = grp_tot_hist_sackoff["actual_tons"] - grp_tot_hist_sackoff["theoretical_tons"]
grp_tot_hist_sackoff["sackoff"] = (grp_tot_hist_sackoff["diff"]/grp_tot_hist_sackoff["theoretical_tons"])*100


f_line_total = plot_line(
    df=grp_tot_hist_sackoff[grp_tot_hist_sackoff["month"] >="2025-01-01"],
    x_col="month",
    y_col="sackoff",
    group_col="pellet",
    width=1100,
    height=500,
)
f_line_total.show()
s3.save_plotly_html(f_line_total, "line_hist_tot_sackoff_pel_month.html")


In [142]:
grp_tot_hist_sackoff = hist_sackoff_digits.groupby(["month"]).agg(
    theoretical_tons=("theoretical_tons", "sum"),
    actual_tons=("actual_tons", "sum"),
).reset_index()

grp_tot_hist_sackoff["diff"] = grp_tot_hist_sackoff["actual_tons"] - grp_tot_hist_sackoff["theoretical_tons"]
grp_tot_hist_sackoff["sackoff"] = (grp_tot_hist_sackoff["diff"]/grp_tot_hist_sackoff["theoretical_tons"])*100

f_line_total_month = plot_line(
    df=grp_tot_hist_sackoff[grp_tot_hist_sackoff["month"]>='2025-01-01'],
    x_col="month",
    y_col="sackoff",
    text_format=".2f",
    width=1100,
    height=500,
)
f_line_total_month.show()
s3.save_plotly_html(f_line_total_month, "line_hist_tot_sackoff_month.html")


In [143]:
f3 = create_subplot_grid(
    figures=[f_line_total,f_line],
    rows=1,
    cols=2,
    titles=["<b>Sackoff hoja consolidado</b>","<b>Sackoff hojas por mes</b>"],
    shared_x=True,
    shared_y=True,
    main_title="",
    height=400,
    width=1100,
)
f3.show()
s3.save_plotly_html(f3, "sackoff_hojas.html")

In [72]:
f3 = create_subplot_grid(
    figures=[f_line_total_month,f_line_month],
    rows=1,
    cols=2,
    titles=["<b>Resumen Sackoff hoja completa por mes</b>","<b>Resumen Sackoff hojas por mes</b>"],
    shared_x=True,
    shared_y=True,
    main_title="",
    height=400,
    width=1100,
)
f3.show()
s3.save_plotly_html(f3, "sackoff_hojas_resumen.html")

## CONSOLIDADOS POR LOTES

In [73]:
sackoff_month =  hist_sackoff_month_digits.groupby(["lot"]).agg(
    date=('date', 'first'),
    date_count=('date', 'count'),
    batch=('lot', 'count'),
    pellet=("pellet", notes_join),
    product=("product", notes_join),
    sheet_name=('sheet_name', notes_join),
    tons=('theoretical_tons', 'sum'),
    actual_tons=('actual_tons', 'sum'), 
).reset_index()
sackoff_month
sackoff_month["month"] = pd.to_datetime(sackoff_month["date"]).dt.strftime('%Y-%m')


In [74]:
consolidate_sackoff = sackoff_month.groupby(["month"]).agg(
    theoretical_tons=("tons", "sum"),
    actual_tons=("actual_tons", "sum"),
).reset_index()

consolidate_sackoff["diff"] = consolidate_sackoff["actual_tons"] - consolidate_sackoff["theoretical_tons"]
consolidate_sackoff["sackoff"] = (consolidate_sackoff["diff"]/consolidate_sackoff["theoretical_tons"])*100


In [75]:
rend_pel["date"].min(), rend_pel["date"].max()

(Timestamp('1900-02-03 00:00:00'), Timestamp('2025-12-30 00:00:00'))

In [76]:
sackoff_month["date"].min(), sackoff_month["date"].max()

(Timestamp('2024-05-01 00:00:00'), Timestamp('2025-12-16 00:00:00'))

In [77]:
rend_pel[rend_pel["date"].between("2025-02-01", '2025-12-01')].shape

(3140, 14)

In [78]:
sackoff_month[sackoff_month["date"].between("2025-02-01", '2025-12-01')].shape

(4138, 10)

In [79]:
rend_pel[rend_pel["date"].between("2025-02-01", '2025-12-01')].shape

(3140, 14)

In [80]:
rev_by_month = pd.merge(
    rend_pel[rend_pel["date"].between("2025-02-01", '2025-12-01')],
    sackoff_month[sackoff_month["date"].between("2025-02-01", '2025-12-01')],
    on=["lot"],
    how="outer",
    indicator=True,
    suffixes=("_hoja_rendimiento", "_hoja_sackoff_mes"),
)

In [81]:
ord_col =[
'lot', 'date_hoja_rendimiento','date_hoja_sackoff_mes', 
'batch_hoja_rendimiento', 'batch_hoja_sackoff_mes',
'pellet_hoja_rendimiento', 'pellet_hoja_sackoff_mes',
'product_hoja_rendimiento','product_hoja_sackoff_mes',
'tons_hoja_rendimiento', 'tons_hoja_sackoff_mes', 'actual_tons',
'sheet_name', '_merge'
]
rev_by_month = rev_by_month[ord_col]

In [82]:
rev_by_month["_merge"].value_counts()

_merge
both          2421
right_only    1717
left_only      719
Name: count, dtype: int64

In [83]:

counts = rev_by_month["_merge"].value_counts()

# DataFrame resumen
df_merge_summary = (
    counts.rename_axis("merge_status")
          .reset_index(name="n_lotes")
)

# Etiquetas claras según tu contexto
label_map = {
    "left_only":  "Solo en Rendimiento (histórico de producción)",
    "right_only": "Solo en Sackoff (histórico de sackoff)",
    "both":       "En ambas fuentes",
}

df_merge_summary["descripcion"] = df_merge_summary["merge_status"].map(label_map)

# (opcional) % sobre el total
total = df_merge_summary["n_lotes"].sum()
df_merge_summary["pct"] = (df_merge_summary["n_lotes"] / total * 100).round(2)

# Orden recomendado
order = ["both", "left_only", "right_only"]
df_merge_summary["merge_status"] = pd.Categorical(df_merge_summary["merge_status"], categories=order, ordered=True)
df_merge_summary = df_merge_summary.sort_values("merge_status").reset_index(drop=True)

df_merge_summary


Unnamed: 0,merge_status,n_lotes,descripcion,pct
0,both,2421,En ambas fuentes,49.85
1,left_only,719,Solo en Rendimiento (histórico de producción),14.8
2,right_only,1717,Solo en Sackoff (histórico de sackoff),35.35


In [108]:
f_pie = plot_pie(
    df_merge_summary,
    label_col="descripcion",
    value_col="n_lotes",
    hover_col="n_lotes",
    title="Distribución de lotes por fuentes",
    width=400,
    height=500,
)
f_pie.show()
s3.save_plotly_html(f_pie, "merge_status_pie.html")

In [109]:
# Nutrinor QA Data Exploration
# Existen 2646 lotes en el archivo de rendimientos que también se encuentran en el archivo de Sackoff.
# Existen 2646 lotes que se encuentran en el archivo de Sackoff (hojas mes a mes) pero no en el de rendimientos.
# Existen 3838 lotes que se encuentran en el archivo de rendimientos pero no en el de Sackoff.

In [110]:
rev_by_month

Unnamed: 0,lot,date_hoja_rendimiento,date_hoja_sackoff_mes,batch_hoja_rendimiento,batch_hoja_sackoff_mes,pellet_hoja_rendimiento,pellet_hoja_sackoff_mes,product_hoja_rendimiento,product_hoja_sackoff_mes,tons_hoja_rendimiento,tons_hoja_sackoff_mes,actual_tons,sheet_name,_merge
0,42158,2025-11-21,NaT,2.0,,pellet 520,,RECIBO SUPERCERDO GRANEL,,20.0,,,,left_only
1,54153,2025-02-01,NaT,1.0,,pellet 520,,MARRANAS GESTACION EB,,14.0,,,,left_only
2,55109,2025-02-03,NaT,1.0,,pellet 520,,NUTRINOR FINALIZADOR,,40.0,,,,left_only
3,55123,2025-02-01,NaT,1.0,,pellet 520,,LECHONES,,16.0,,,,left_only
4,55146,2025-02-03,NaT,1.0,,pellet 520,,LECHONES 454,,40.0,,,,left_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4852,961160,NaT,2025-03-01,,1.0,,harina,,NUTRINOR ALTIPLANO EX ALTA ENER HARINA,,7.00,6.62,MARZO 25,right_only
4853,961161,NaT,2025-03-02,,1.0,,harina,,GANADERIA LEVANTE PLUS,,15.00,14.70,MARZO 25,right_only
4854,96146,NaT,2025-03-04,,1.0,,pellet 520,,MAQUILA NUTRINOR LECHONES MEDICADO,,32.00,31.84,MARZO 25,right_only
4855,96171,NaT,2025-03-02,,1.0,,pellet 420,,NUTRINOR ALTIPLANO EX ALTA ENER PELLET,,32.12,31.52,MARZO 25,right_only


In [111]:
rev_by_month_both = rev_by_month[rev_by_month["_merge"] == 'both'][ord_col]
rev_by_month_both["month"] = pd.to_datetime(rev_by_month_both["date_hoja_rendimiento"]).dt.strftime('%Y-%m')
rev_by_month_both["diff_kilos"] = (rev_by_month_both["tons_hoja_rendimiento"] - rev_by_month_both["tons_hoja_sackoff_mes"])*1000


rev_by_month_diff_tons = rev_by_month_both[rev_by_month_both["tons_hoja_rendimiento"] != rev_by_month_both["tons_hoja_sackoff_mes"]]

datos_bad_tons =rev_by_month_diff_tons[['lot', 'tons_hoja_rendimiento', 'tons_hoja_sackoff_mes',
       'sheet_name',  'pellet_hoja_rendimiento', 'pellet_hoja_sackoff_mes', 'diff_kilos']].round(2)

s3.save_dataframe(datos_bad_tons, "datos_bad_tons.csv")
rev_by_month_both

Unnamed: 0,lot,date_hoja_rendimiento,date_hoja_sackoff_mes,batch_hoja_rendimiento,batch_hoja_sackoff_mes,pellet_hoja_rendimiento,pellet_hoja_sackoff_mes,product_hoja_rendimiento,product_hoja_sackoff_mes,tons_hoja_rendimiento,tons_hoja_sackoff_mes,actual_tons,sheet_name,_merge,month,diff_kilos
362,1011100,2025-03-04,2025-03-04,1.0,1.0,pellet 520,pellet 520,PRELEVANTE BR,MAQUILA BR PRELEVANTE EB,8.0,8.04,8.72,MARZO 25,both,2025-03,-40.0
363,1011103,2025-03-03,2025-03-04,1.0,1.0,pellet 520,pellet 520,TERNERAS ELITE,NUTRINOR LECHE PELLET ELITE,16.0,16.00,16.08,MARZO 25,both,2025-03,0.0
364,1011108,2025-03-03,2025-03-04,2.0,1.0,pellet 520,harina,DESARROLLO SUPERCERDO GRANEL,GANADERIA LEVANTE PLUS,25.0,25.10,24.71,MARZO 25,both,2025-03,-100.0
365,1011109,2025-03-04,2025-03-04,1.0,1.0,pellet 520,harina,MAGRO SUPERCERDO GRANEL,NUTRINOR ALTIPLANO EX ALTA ENER HARINA,35.0,35.00,34.25,MARZO 25,both,2025-03,0.0
366,1011112,2025-03-04,2025-03-04,1.0,1.0,pellet 420,pellet 420,ALTA ENERGIA CP,NUTRINOR ALTA ENERGIA CP PELLET,32.0,32.40,31.96,MARZO 25,both,2025-03,-400.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4480,4861131,2025-11-29,2025-12-01,1.0,2.0,pellet 520,"granel, pellet 520",NUTRIMAGRO GRANEL,"MAQUILA NUTRI-MAGRO GRANEL, MAQUILA NUTRI-MAGRO",12.0,36.00,35.14,"DICIEMBRE 25, DIC 24",both,2025-11,-24000.0
4483,4861164,2025-11-29,2025-12-01,1.0,1.0,pellet 420,granel,SUPERLECHE GRANEL,NUTRINOR SUPER LECHE PELLET GRANEL,10.0,10.04,9.76,DICIEMBRE 25,both,2025-11,-40.0
4484,4861166,2025-12-01,2025-12-01,1.0,2.0,pellet 420,pellet 420,LECHE TOP,NUTRINOR LECHE TOP,16.0,32.12,32.00,"DICIEMBRE 25, NOV 24",both,2025-12,-16120.0
4486,486153,2025-11-30,2025-12-01,1.0,1.0,pellet 520,granel,MARRANAS GESTACION EB GRANEL,MAQUILA GESTACION NORMAL SUPERCERDO GRANEL,8.0,8.00,7.63,DICIEMBRE 25,both,2025-11,0.0


In [112]:
rev_by_month_both[rev_by_month_both["tons_hoja_rendimiento"] == rev_by_month_both["tons_hoja_sackoff_mes"]].shape

(1276, 16)

In [113]:
1276/2421

0.527054935976869

In [114]:
rev_by_month_both["pellet_hoja_rendimiento"].value_counts()

pellet_hoja_rendimiento
pellet 520                1513
pellet 420                 902
pellet 420, pellet 520       6
Name: count, dtype: int64

In [115]:
rev_by_month_both["pellet_hoja_sackoff_mes"].value_counts()
pel = ["granel", "harina", "pellet 420", "pellet 520", "pellet 350"]

In [116]:
rev_by_month_both["diff_kilos"].describe()

count      2421.000000
mean      -5935.900950
std       22651.060525
min     -880000.000000
25%        -560.000000
50%           0.000000
75%           0.000000
max       34280.000000
Name: diff_kilos, dtype: float64

In [117]:
datos_bad_tons

Unnamed: 0,lot,tons_hoja_rendimiento,tons_hoja_sackoff_mes,sheet_name,pellet_hoja_rendimiento,pellet_hoja_sackoff_mes,diff_kilos
362,1011100,8.0,8.04,MARZO 25,pellet 520,pellet 520,-40.0
364,1011108,25.0,25.10,MARZO 25,pellet 520,harina,-100.0
366,1011112,32.0,32.40,MARZO 25,pellet 420,pellet 420,-400.0
372,1011166,16.0,16.04,MARZO 25,pellet 420,pellet 420,-40.0
374,101180,32.0,32.24,MARZO 25,pellet 420,pellet 420,-240.0
...,...,...,...,...,...,...,...
4477,4861106,10.0,12.00,DICIEMBRE 25,pellet 520,granel,-2000.0
4478,4861113,42.0,43.00,"DICIEMBRE 25, NOVIEMBRE 25",pellet 420,"pellet 420, granel",-1000.0
4480,4861131,12.0,36.00,"DICIEMBRE 25, DIC 24",pellet 520,"granel, pellet 520",-24000.0
4483,4861164,10.0,10.04,DICIEMBRE 25,pellet 420,granel,-40.0


In [132]:
plot_statistical_strip

<function core.viz.plot_statistical_strip(df: pandas.core.frame.DataFrame, x_col: str, y_col: str, category_order: Optional[List[str]] = None, color_map: Optional[Dict[str, str]] = None, show_boxplot: bool = True, show_mean_ci: bool = True, show_global_mean: bool = True, show_counts: bool = True, title: str = '', x_title: Optional[str] = None, y_title: Optional[str] = None, point_opacity: float = 0.6, point_size: int = 7, box_opacity: float = 0.25, height: int = 600, width: int = 1000, filename: Optional[str] = None) -> plotly.graph_objs._figure.Figure>

In [138]:
f = plot_statistical_strip(
    rev_by_month_diff_tons[rev_by_month_diff_tons["diff_kilos"].between(-1000, 1000)],
    x_col="pellet_hoja_sackoff_mes",
    y_col="diff_kilos",
    #title="<b>Diferencia de toneladas en hoja de Rendimiento y Sackoff por pellets</b>"
    x_title="Pellets rotulo de hoja Sackoff (mes)",
    y_title="Diferencia de toneladas (kg)",
)
f.show()
s3.save_plotly_html(f, "subplots_lotes_faltantes_pellets.html")

In [140]:
f = plot_statistical_strip(
    rev_by_month_diff_tons[rev_by_month_diff_tons["diff_kilos"].between(-1000, 1000)],
    x_col="month",
    y_col="diff_kilos",
     x_title="Pellets rotulo de hoja Sackoff (mes)",
    y_title="Diferencia de toneladas (kg)",
    
)
f.show()
s3.save_plotly_html(f, "subplots_lotes_faltantes_mes.html")
#tienen a pesar mas hacial el sackofff

In [123]:
plot_statistical_strip(
    rev_by_month_both[(rev_by_month_both["pellet_hoja_sackoff_mes"].isin(pel)) &(rev_by_month_both["diff_kilos"].between(-1000, 1000))],
    x_col="pellet_hoja_sackoff_mes",
    y_col="diff_kilos",
    
)
#tienen a pesar mas hacial el sackofff

In [137]:
rev_by_month_both

Unnamed: 0,lot,date_hoja_rendimiento,date_hoja_sackoff_mes,batch_hoja_rendimiento,batch_hoja_sackoff_mes,pellet_hoja_rendimiento,pellet_hoja_sackoff_mes,product_hoja_rendimiento,product_hoja_sackoff_mes,tons_hoja_rendimiento,tons_hoja_sackoff_mes,actual_tons,sheet_name,_merge,diff_tons,month
362,1011100,2025-03-04,2025-03-04,1.0,1.0,pellet 520,pellet 520,PRELEVANTE BR,MAQUILA BR PRELEVANTE EB,8.0,8.04,8.72,MARZO 25,both,-0.04,2025-03
363,1011103,2025-03-03,2025-03-04,1.0,1.0,pellet 520,pellet 520,TERNERAS ELITE,NUTRINOR LECHE PELLET ELITE,16.0,16.00,16.08,MARZO 25,both,0.00,2025-03
364,1011108,2025-03-03,2025-03-04,2.0,1.0,pellet 520,harina,DESARROLLO SUPERCERDO GRANEL,GANADERIA LEVANTE PLUS,25.0,25.10,24.71,MARZO 25,both,-0.10,2025-03
365,1011109,2025-03-04,2025-03-04,1.0,1.0,pellet 520,harina,MAGRO SUPERCERDO GRANEL,NUTRINOR ALTIPLANO EX ALTA ENER HARINA,35.0,35.00,34.25,MARZO 25,both,0.00,2025-03
366,1011112,2025-03-04,2025-03-04,1.0,1.0,pellet 420,pellet 420,ALTA ENERGIA CP,NUTRINOR ALTA ENERGIA CP PELLET,32.0,32.40,31.96,MARZO 25,both,-0.40,2025-03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4480,4861131,2025-11-29,2025-12-01,1.0,2.0,pellet 520,"granel, pellet 520",NUTRIMAGRO GRANEL,"MAQUILA NUTRI-MAGRO GRANEL, MAQUILA NUTRI-MAGRO",12.0,36.00,35.14,"DICIEMBRE 25, DIC 24",both,-24.00,2025-11
4483,4861164,2025-11-29,2025-12-01,1.0,1.0,pellet 420,granel,SUPERLECHE GRANEL,NUTRINOR SUPER LECHE PELLET GRANEL,10.0,10.04,9.76,DICIEMBRE 25,both,-0.04,2025-11
4484,4861166,2025-12-01,2025-12-01,1.0,2.0,pellet 420,pellet 420,LECHE TOP,NUTRINOR LECHE TOP,16.0,32.12,32.00,"DICIEMBRE 25, NOV 24",both,-16.12,2025-12
4486,486153,2025-11-30,2025-12-01,1.0,1.0,pellet 520,granel,MARRANAS GESTACION EB GRANEL,MAQUILA GESTACION NORMAL SUPERCERDO GRANEL,8.0,8.00,7.63,DICIEMBRE 25,both,0.00,2025-11


In [136]:
plot_statistical_strip(
    rev_by_month_both[(rev_by_month_both["diff_kilos"].between(-1000, 1000))],
    x_col="month",
    y_col="diff_kilos",
    
)

KeyError: 'diff_kilos'

In [125]:
for pl in pel:
    f = plot_statistical_strip(
    rev_by_month_both[(rev_by_month_both["pellet_hoja_sackoff_mes"].isin([pl])) &(rev_by_month_both["diff_kilos"].between(-1000, 1000))],
    x_col="month",
    y_col="diff_kilos",
    title=f"Pellet {pl}")
    f.show()

In [126]:
rev_by_month_both = rev_by_month[rev_by_month["_merge"] == 'both'][ord_col]
rev_by_month_both["diff_tons"] = rev_by_month_both["tons_hoja_rendimiento"] - rev_by_month_both["tons_hoja_sackoff_mes"]
rev_by_month_both["month"] = pd.to_datetime(rev_by_month_both["date_hoja_rendimiento"]).dt.strftime('%Y-%m')


grp_rev_mes = rev_by_month_both.groupby(["month"]).agg(
    tons_hoja_rendimiento=("tons_hoja_rendimiento", "sum"),
    tons_hoja_sackoff_mes=("tons_hoja_sackoff_mes", "sum"),
    actual_tons=("actual_tons", "sum")
).reset_index()

grp_rev_mes["diff_tons_fuentes"] = grp_rev_mes["tons_hoja_rendimiento"] - grp_rev_mes["tons_hoja_sackoff_mes"]
diff1 = grp_rev_mes["tons_hoja_rendimiento"] - grp_rev_mes["actual_tons"]
grp_rev_mes["sackoff_hoja_rendimiento"] = diff1/grp_rev_mes["tons_hoja_rendimiento"]*100

diff2 = grp_rev_mes["tons_hoja_sackoff_mes"] - grp_rev_mes["actual_tons"]
grp_rev_mes["sackoff_hoja_sackoff_mes"] = diff2/grp_rev_mes["tons_hoja_sackoff_mes"]*100
grp_rev_mes

Unnamed: 0,month,tons_hoja_rendimiento,tons_hoja_sackoff_mes,actual_tons,diff_tons_fuentes,sackoff_hoja_rendimiento,sackoff_hoja_sackoff_mes
0,2025-03,6028.0,6182.8694,6111.958,-154.8694,-1.3928,1.146901
1,2025-04,6656.0,6908.0788,6814.27,-252.0788,-2.377855,1.357958
2,2025-05,5941.0,7295.3568,7190.79,-1354.3568,-21.036694,1.433334
3,2025-06,5713.16,5857.0496,5770.36,-143.8896,-1.001197,1.48009
4,2025-07,6057.96,8028.278,7915.25,-1970.318,-30.658671,1.407874
5,2025-08,5993.4,12122.6256,11949.2,-6129.2256,-99.372643,1.430594
6,2025-09,5877.0,7052.0118,6958.2796,-1175.0118,-18.398496,1.329155
7,2025-10,1392.0,2641.441,2596.408,-1249.441,-86.523563,1.704865
8,2025-11,6481.6,8407.1052,8308.9848,-1925.5052,-28.193421,1.167113
9,2025-12,32.0,48.12,48.08,-16.12,-50.25,0.083126


In [98]:
rev_by_month_both = rev_by_month[rev_by_month["_merge"] == 'both'][ord_col]
rev_by_month_both["diff_tons"] = rev_by_month_both["tons_hoja_rendimiento"] - rev_by_month_both["tons_hoja_sackoff_mes"]

rev_by_month_both = rev_by_month_both[rev_by_month_both["diff_tons"] ==0]
rev_by_month_both["month"] = pd.to_datetime(rev_by_month_both["date_hoja_rendimiento"]).dt.strftime('%Y-%m')


grp_rev_mes = rev_by_month_both.groupby(["month"]).agg(
    tons_hoja_rendimiento=("tons_hoja_rendimiento", "sum"),
    tons_hoja_sackoff_mes=("tons_hoja_sackoff_mes", "sum"),
    actual_tons=("actual_tons", "sum")
).reset_index()

grp_rev_mes["diff_tons_fuentes"] = grp_rev_mes["tons_hoja_rendimiento"] - grp_rev_mes["tons_hoja_sackoff_mes"]
diff1 = grp_rev_mes["tons_hoja_rendimiento"] - grp_rev_mes["actual_tons"]
grp_rev_mes["sackoff_hoja_rendimiento"] = diff1/grp_rev_mes["tons_hoja_rendimiento"]*100

diff2 = grp_rev_mes["tons_hoja_sackoff_mes"] - grp_rev_mes["actual_tons"]
grp_rev_mes["sackoff_hoja_sackoff_mes"] = diff2/grp_rev_mes["tons_hoja_sackoff_mes"]*100
grp_rev_mes

Unnamed: 0,month,tons_hoja_rendimiento,tons_hoja_sackoff_mes,actual_tons,diff_tons_fuentes,sackoff_hoja_rendimiento,sackoff_hoja_sackoff_mes
0,2025-03,2753.0,2753.0,2724.868,0.0,1.021867,1.021867
1,2025-04,3238.0,3238.0,3197.8,0.0,1.241507,1.241507
2,2025-05,3105.0,3105.0,3060.73,0.0,1.425765,1.425765
3,2025-06,3068.0,3068.0,3019.91,0.0,1.567471,1.567471
4,2025-07,3022.0,3022.0,2986.43,0.0,1.177035,1.177035
5,2025-08,556.0,556.0,546.46,0.0,1.715827,1.715827
6,2025-09,2886.0,2886.0,2852.2528,0.0,1.169342,1.169342
7,2025-10,472.0,472.0,465.02,0.0,1.478814,1.478814
8,2025-11,3485.0,3485.0,3453.3,0.0,0.909613,0.909613
9,2025-12,16.0,16.0,16.08,0.0,-0.5,-0.5


# Particulardades del sackoff