In [27]:

import numpy as np
import pandas as pd
from typing import List, Dict, Tuple
import matplotlib.pyplot as plt

from pathlib import Path
from core.utils import ensure_dirs
from core.utils import read_excel_from_s3
import ipynbname

import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [28]:
# LOS FOLDERS y SCRIPS (notebook y .yaml) deben tener el mismo nombre

notebook_name = ipynbname.name()

ROOT_DATA = Path(f"../data/{notebook_name}")
ROOT_IMAGEN = Path(f"../images/{notebook_name}")
ensure_dirs(ROOT_DATA, ROOT_IMAGEN)
print(f"Carpetas verificadas/creadas:\n- {ROOT_DATA.resolve()}\n- {ROOT_IMAGEN.resolve()}")

Carpetas verificadas/creadas:
- /Users/juandavidrincon/Documents/galileo/data/raza_junio
- /Users/juandavidrincon/Documents/galileo/images/raza_junio


## Funciones Graficación

In [159]:

def plot_4metrics_by_machine_plotly(
    df_base: pd.DataFrame,
    metrics: dict,
    group_col: str,
    output_path: str,
    bar_color: str = "#1C8074",
    bar_height: float = 0.35
):
    """
    Crea figura 2x2 con métricas promedio por máquina (sin separación por Adiflow).
    """
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=list(metrics.values()),
        shared_yaxes=False,
        horizontal_spacing=0.12,
        vertical_spacing=0.15
    )

    subplot_map = {
        0: (1, 1),
        1: (1, 2),
        2: (2, 1),
        3: (2, 2),
    }

    for i, (metric_col, pretty_name) in enumerate(metrics.items()):
        row, col = subplot_map[i]

        df_plot = (
            df_base.groupby(group_col, as_index=False)[metric_col]
                   .mean()
                   .sort_values(group_col)
        )

        machines = df_plot[group_col].tolist()
        values   = df_plot[metric_col].tolist()

        fig.add_trace(
            go.Bar(
                x=values,
                y=machines,
                orientation='h',
                marker_color=bar_color,
                text=[f"{v:.2f}" for v in values],
                textposition='outside',
                insidetextanchor='start',
                textfont=dict(size=12, color="black"),
                name=pretty_name,
                showlegend=False
            ),
            row=row, col=col
        )

        fig.update_yaxes(
            title_text=group_col if col == 1 else "",
            tickfont=dict(size=11, color='black'),
            title_font=dict(size=14, color='black'),
            row=row, col=col
        )

        fig.update_xaxes(
            title_text=pretty_name.split()[0],
            tickfont=dict(size=11, color='black'),
            title_font=dict(size=14, color='black'),
            row=row, col=col
        )

    # Estilo global
    for ann in fig['layout']['annotations']:
        ann['font'] = dict(size=16, color="black")

    fig.update_layout(
        height=500,
        width=1000,
        barmode='group',
        plot_bgcolor='white',
        paper_bgcolor='white',
        margin=dict(l=60, r=30, t=60, b=40)
    )

    fig.write_html(output_path)
    fig.show()
    print(f"✅ Gráfico guardado como HTML en: {output_path}")



def plot_comparative_moisture_change_by_machine_plotly(
    df1: pd.DataFrame,
    df2: pd.DataFrame,
    group_col: str,
    value_cols: Tuple[str, str],
    titles: Tuple[str, str],
    output_path: str,
    label_y: str = "Máquina Empleada",
    label_x: str = "Humedad (%)",
    colors: Tuple[str, str] = ("#94AF92", "#1C8074")
):
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots

    def prepare_df(df):
        df_plot = df.groupby(group_col)[list(value_cols)].mean()
        df_plot["Cambio_pct"] = df_plot[value_cols[1]] - df_plot[value_cols[0]]
        return df_plot

    df1_plot = prepare_df(df1)
    df2_plot = prepare_df(df2)

    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=titles,
        shared_yaxes=False,
        horizontal_spacing=0.15
    )

    for col, (df_plot, _) in enumerate([(df1_plot, titles[0]), (df2_plot, titles[1])], start=1):
        labels = df_plot.index.tolist()

        # Orden de traza: Final primero para que quede encima
        fig.add_trace(
            go.Bar(
                x=df_plot[value_cols[1]],
                y=labels,
                orientation='h',
                name=value_cols[1],
                marker_color=colors[1],
                showlegend=(col == 1),
                offsetgroup='final'
            ),
            row=1, col=col
        )

        fig.add_trace(
            go.Bar(
                x=df_plot[value_cols[0]],
                y=labels,
                orientation='h',
                name=value_cols[0],
                marker_color=colors[0],
                showlegend=(col == 1),
                offsetgroup='inicial'
            ),
            row=1, col=col
        )

        for i, label in enumerate(labels):
            final_val = df_plot[value_cols[1]].iloc[i]
            diff = df_plot["Cambio_pct"].iloc[i]
            direction = "▲" if diff > 0 else "▼"
            text = f"{abs(diff):.1f}% {direction}"

            fig.add_annotation(
                x=final_val + 0.15,
                y=label,
                text=text,
                font=dict(size=14, color="black"),
                showarrow=False,
                xanchor="left",
                yanchor="middle",
                row=1, col=col
            )

    fig.update_layout(
        height=400,
        width=1200,
        barmode='group',
        plot_bgcolor='white',
        paper_bgcolor='white',
        title_font=dict(size=22, color="black"),
        margin=dict(l=80, r=40, t=60, b=40),
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=1.02,
            xanchor='center',
            x=0.5,
            font=dict(size=14, color="black")
        )
    )

    fig.update_xaxes(
        title_text=label_x,
        title_font=dict(size=16, color='black'),
        tickfont=dict(size=13, color='black'),
        color='black'
    )
    fig.update_yaxes(
        title_text=label_y,
        title_font=dict(size=16, color='black'),
        tickfont=dict(size=13, color='black'),
        color='black'
    )
    for annotation in fig['layout']['annotations']:
        annotation['font'] = dict(color='black', size=15)

    fig.write_html(output_path)
    fig.show()
    print(f"✅ Gráfico guardado como HTML interactivo en: {output_path}")


def plot_aw_by_machine_comparison_plotly(
    df1: pd.DataFrame,
    df2: pd.DataFrame,
    group_col: str,
    value_col: str,
    titles: Tuple[str, str],
    output_path: str,
    bar_color: str = "#1C8074",
    x_limit: Tuple[float, float] = (0., 0.8)
):


    def prepare_df(df):
        return df.groupby(group_col, as_index=False)[value_col].mean().sort_values(by=group_col)

    df1_plot = prepare_df(df1)
    df2_plot = prepare_df(df2)

    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=titles,
        shared_yaxes=False,
        horizontal_spacing=0.15
    )

    for col, df_plot in enumerate([df1_plot, df2_plot], start=1):
        labels = df_plot[group_col].tolist()
        values = df_plot[value_col].tolist()

        fig.add_trace(
            go.Bar(
                x=values,
                y=labels,
                orientation='h',
                marker_color=bar_color,
                text=[f"{v:.2f}" for v in values],
                textposition='outside',
                insidetextanchor='start',
                textfont=dict(size=12, color="black"),
                name=value_col,
                showlegend=False
            ),
            row=1, col=col
        )

    # Estilo del gráfico
    fig.update_layout(
        height=400,
        width=1200,
        plot_bgcolor='white',
        paper_bgcolor='white',
        barmode='group',
        margin=dict(l=80, r=40, t=60, b=40),
        title_font=dict(size=22, color="black"),
    )

    # Títulos de subplots en negro
    for annotation in fig['layout']['annotations']:
        annotation['font'] = dict(color='black', size=18)

    fig.update_xaxes(
        range=x_limit,
        title_text="Aw",
        title_font=dict(size=16, color="black"),
        tickfont=dict(size=13, color="black"),
        color="black"
    )

    fig.update_yaxes(
        title_text=group_col,
        title_font=dict(size=16, color="black"),
        tickfont=dict(size=13, color="black"),
        color="black"
    )

    fig.write_html(output_path)
    fig.show()
    print(f"✅ Gráfico guardado como HTML en: {output_path}")





def plot_metricas_por_maquina(
    df: pd.DataFrame,
    columnas_metricas: List[str],
    columna_maquina: str = "Maquina Empleada",
    columna_informe: str = "informe",
    colores: dict = {"mayo": "#1C8074", "junio": "#94AF92"},
    titulo: str = "",
    name=""

) -> go.Figure:
    maquinas = df[columna_maquina].unique()
    fig = make_subplots(
        rows=1,
        cols=len(maquinas),
        subplot_titles=maquinas,
        shared_yaxes=True
    )

    for idx, maquina in enumerate(maquinas, start=1):
        df_maquina = df[df[columna_maquina] == maquina]
        for metrica in columnas_metricas:
            fig.add_trace(
                go.Bar(
                    x=df_maquina[columna_informe],
                    y=df_maquina[metrica],
                    name=metrica,
                    marker_color=[colores.get(i, "#888888") for i in df_maquina[columna_informe]],
                    showlegend=(idx == 1),
                    text=[f"{v:.2f}" for v in df_maquina[metrica]],
                    textposition="outside",
                    textfont_color="black"
                ),
                row=1, col=idx
            )

    fig.update_layout(
        title=dict(text=titulo, font=dict(color="black"), x=0.5),
        height=500,
        width=800,
        barmode="group",
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="white",
        font=dict(color="black"),
        legend=dict(font=dict(color="black"))
    )

    fig.update_yaxes(
        title_text=columnas_metricas[0],
        title_font=dict(color="black"),
        tickfont=dict(color="black")
    )

    for i in range(1, len(maquinas) + 1):
        fig.update_xaxes(
            title_font=dict(color="black"),
            tickfont=dict(color="black"),
            row=1, col=i
        )

    fig.write_html(f"{ROOT_IMAGEN}/{name}.html")

    return fig



## Business Fuctions

In [160]:
def clear_initial_datraframe(raw_base: pd.DataFrame)-> pd.DataFrame:
    raw_base.columns = raw_base.columns.str.strip()
    raw = raw_base[raw_base["Producto"].notnull()]
    raw["Fecha"] = pd.to_datetime(raw["Fecha"], errors='coerce')
    df = raw[raw["Fecha"].notnull()].copy()
    df["Adiflow"] = np.where(df["Con ADIFLOW"].isnull(), "Sin Adiflow", "Con Adiflow")
    df.columns = [x.strip() for x in df.columns]
    df = df[df['Maquina Empleada'].notnull()]

    cols = [
        "Carga (puntos)", "Temperatura (°C)", "Presión (psi)",
        "% Consistencia",
        "%Humedad inicial",
        "% Humedad Final",
        "AW", "Kilos Mp Inicial", "Kilos Mp- Final", "%Desviación"
    ]

    for col in cols:
        if df[col].dtype == 'object' or pd.api.types.is_string_dtype(df[col]):
            df[col] = pd.to_numeric(df[col].astype(str).str.replace(",", "."), errors="coerce")

    # Calcular sackoff_cal
    df["sackoff_cal"] = (df["Kilos Mp- Final"] - df["Kilos Mp Inicial"]) * 100 / df["Kilos Mp Inicial"]
    return df



def build_group(data, cols=['Maquina Empleada', 'Adiflow']):

    df_group = data.groupby(cols).agg(
        carga_prom=("Carga (puntos)", "mean"),
        temp_prom=("Temperatura (°C)", "mean"),
        presion_prom=("Presión (psi)", "mean"),
        durabilidad_prom=("% Consistencia", "median"),
        hum_ini=("%Humedad inicial", "mean"),
        hum_final=("% Humedad Final", "mean"),
        aw_prom=("AW", "mean"),
        kilos_iniciales=("Kilos Mp Inicial", "sum"),
        kilos_finales=("Kilos Mp- Final", "sum"),
        desv_prom=("%Desviación", "mean"),
    ).reset_index()
    df_group["prom_diff_hum_prom"] = df_group["hum_final"] - df_group["hum_ini"]
    df_group["sackoff"] = (df_group["kilos_finales"] - df_group["kilos_iniciales"])/df_group["kilos_iniciales"]*100
    df_group["ppc_hum"] = (df_group["hum_final"] - df_group["hum_ini"])/df_group["hum_ini"]#*100
    df_group  = round(df_group,2)

    df_pel_group_dep = df_group[cols+["kilos_iniciales","kilos_finales", "sackoff", "aw_prom", "hum_ini", "hum_final", "temp_prom", "presion_prom", "carga_prom", "durabilidad_prom"]]
    df_pel_group_dep = df_pel_group_dep.round(2)
    df_pel_group_dep["kilos_finales"] = df_pel_group_dep["kilos_finales"]/1000
    df_pel_group_dep["kilos_iniciales"] = df_pel_group_dep["kilos_iniciales"]/1000
    df_pel_group_dep = df_pel_group_dep.rename(columns={
        "kilos_finales": "Ton Finales",
        "kilos_iniciales": "Ton Iniciales",
        "sackoff": "Sackoff (%)",
        "aw_prom": "AW Prom",
        "hum_ini": "Hum Inicial Prom (%)",
        "hum_final": "Hum Final Prom (%)",
        "temp_prom": "Temp Prom (°C)",
        "presion_prom": "Presion Prom (Psi)",
        "carga_prom": "Carga Prom (Puntos)",
        "durabilidad_prom": "Durabilidad Prom (%)",
    })
    return df_pel_group_dep

## Lectura

In [161]:
raw_junio = read_excel_from_s3('s3://galileo-c4e9a2f1/raza/Estadistico Adiflow Junio).xlsx', sheet_name='Adiflow', skiprows=1)

raw_mayo = read_excel_from_s3(
    path="s3://galileo-c4e9a2f1/raza/Estadistico Adiflow RAZA.xlsx",
     sheet_name="Adiflow Peletizado", skiprows=1)

In [162]:
df = clear_initial_datraframe(raw_junio)
df_pel = df[df["Maquina Empleada"].isin(['Pelet 1', 'Pelet 2 y 3', 'Pelet 4'])]
df_ext = df[~df["Maquina Empleada"].isin(['Pelet 1', 'Pelet 2 y 3', 'Pelet 4'])]


df_mayo = clear_initial_datraframe(raw_mayo)
df_pel_mayo = df_mayo[df_mayo["Maquina Empleada"].isin(['Pelet 1', 'Pelet 2 y 3', 'Pelet 4'])]
df_ext_mayo = df_mayo[~df_mayo["Maquina Empleada"].isin(['Pelet 1', 'Pelet 2 y 3', 'Pelet 4'])]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [163]:
df_pel_group = build_group(df_pel)
df_pel_group.to_csv(f"{ROOT_DATA}/data_pel_group_dep.csv", index=False)
df_pel_group["informe"] = 'junio'
df_pel_group

Unnamed: 0,Maquina Empleada,Adiflow,Ton Iniciales,Ton Finales,Sackoff (%),AW Prom,Hum Inicial Prom (%),Hum Final Prom (%),Temp Prom (°C),Presion Prom (Psi),Carga Prom (Puntos),Durabilidad Prom (%),informe
0,Pelet 1,Con Adiflow,852.889,853.398,0.06,0.67,10.46,11.09,68.94,44.95,18.67,95.1,junio
1,Pelet 2 y 3,Con Adiflow,1056.561,1054.565,-0.19,0.68,11.11,11.49,66.63,68.4,11.69,94.3,junio
2,Pelet 4,Con Adiflow,506.638,506.612,-0.01,0.68,10.91,11.35,68.89,45.37,9.0,94.25,junio


In [164]:
df_pel_group_mayo = build_group(df_pel_mayo)
df_pel_group_mayo["informe"] = 'mayo'
df_pel_group_mayo_adiflow = df_pel_group_mayo[df_pel_group_mayo["Adiflow"] == 'Con Adiflow']

In [165]:
df_time_pel = pd.concat([df_pel_group_mayo_adiflow, df_pel_group])


In [166]:
# Probar función con los datos reconstruidos
fig = plot_metricas_por_maquina(df_time_pel, columnas_metricas=["AW Prom"], name="aw_comparativa_pel")
fig = plot_metricas_por_maquina(df_time_pel, columnas_metricas=["Hum Inicial Prom (%)"],  name="humedad_inicial_pel")
fig = plot_metricas_por_maquina(df_time_pel, columnas_metricas=["Hum Final Prom (%)"],  name="humedad_final_pel")
fig = plot_metricas_por_maquina(df_time_pel, columnas_metricas=["Temp Prom (°C)"],  name="temp_pel")
fig = plot_metricas_por_maquina(df_time_pel, columnas_metricas=["Durabilidad Prom (%)"],  name="pdi_pel")
fig = plot_metricas_por_maquina(df_time_pel, columnas_metricas=["Sackoff (%)"],  name="sackoff_pel")


In [167]:
#build_group(df_pel, cols=['Maquina Empleada', 'Adiflow','Producto'])

In [168]:
df_ext_group = build_group(df_ext)
df_ext_group = df_ext_group[['Maquina Empleada', 'Adiflow', 'Ton Iniciales',
              'Ton Finales','Sackoff (%)', 'AW Prom',
              'Hum Inicial Prom (%)','Hum Final Prom (%)']]
df_ext_group.to_csv(f"{ROOT_DATA}/data_extruder_group_dep.csv", index=False)
df_ext_group["informe"] = 'junio'

In [176]:
df_ext_group_mayo = build_group(df_ext_mayo)
df_ext_group_mayo["informe"] = 'mayo'
df_ext_group_mayo_adoflow = df_ext_group_mayo[
    (df_ext_group_mayo["Adiflow"] == 'Con Adiflow') & (df_ext_group_mayo["Maquina Empleada"] !='Extruder 621')]
df_ext_group_mayo_adoflow

Unnamed: 0,Maquina Empleada,Adiflow,Ton Iniciales,Ton Finales,Sackoff (%),AW Prom,Hum Inicial Prom (%),Hum Final Prom (%),Temp Prom (°C),Presion Prom (Psi),Carga Prom (Puntos),Durabilidad Prom (%),informe
0,Extruder 1020,Con Adiflow,2160.005,2075.688,-3.9,0.51,10.14,7.89,,,,,mayo
2,Extruder 620,Con Adiflow,1112.876,1075.328,-3.37,0.53,9.94,8.37,,,9.0,,mayo


In [177]:
df_time_ext = pd.concat([df_ext_group_mayo_adoflow, df_ext_group])


In [178]:
# Probar función con los datos reconstruidos
fig = plot_metricas_por_maquina(df_time_ext, columnas_metricas=["AW Prom"], name="aw_comparativa_ext")
fig = plot_metricas_por_maquina(df_time_ext, columnas_metricas=["Hum Inicial Prom (%)"],  name="humedad_inicial_ext")
fig = plot_metricas_por_maquina(df_time_ext, columnas_metricas=["Hum Final Prom (%)"],  name="humedad_final_ext")
fig = plot_metricas_por_maquina(df_time_ext, columnas_metricas=["Temp Prom (°C)"],  name="temp_ext")
fig = plot_metricas_por_maquina(df_time_ext, columnas_metricas=["Durabilidad Prom (%)"],  name="pdi_ext")
fig = plot_metricas_por_maquina(df_time_ext, columnas_metricas=["Sackoff (%)"],  name="sackoff_ext")
fig


In [179]:
plot_comparative_moisture_change_by_machine_plotly(
    df1=df_ext,
    df2=df_pel,
    group_col='Maquina Empleada',
    value_cols=("%Humedad inicial", "% Humedad Final"),
    titles=("Máquinas de Extrusión", "Máquinas de Peletización"),
    output_path=f"{ROOT_IMAGEN}/comparativo_humedad.html"
)


✅ Gráfico guardado como HTML interactivo en: ../images/raza_junio/comparativo_humedad.html


In [180]:
plot_aw_by_machine_comparison_plotly(
    df1=df_ext_group,
    df2=df_pel_group,
    group_col="Maquina Empleada",
    value_col="AW Prom",
    titles=("Extrusión", "Peletización"),
    output_path=f"{ROOT_IMAGEN}/comparativo_aw_por_maquina.html"
)


✅ Gráfico guardado como HTML en: ../images/raza_junio/comparativo_aw_por_maquina.html


In [181]:
plot_aw_by_machine_comparison_plotly(
    df1=df_ext_group,
    df2=df_pel_group,
    group_col="Maquina Empleada",
    value_col="Sackoff (%)",
    titles=("Extrusión", "Peletización"),
    x_limit=(-5.5,1),
    output_path=f"{ROOT_IMAGEN}/comparativos_sackoff_por_maquina.html"
)

✅ Gráfico guardado como HTML en: ../images/raza_junio/comparativos_sackoff_por_maquina.html


In [182]:
METRICS = {
    "Durabilidad Prom (%)": "Durabilidad (%)",
    "Temp Prom (°C)":        "Temperatura (°C)",
    "Carga Prom (Puntos)":   "Carga (puntos)",
    "Presion Prom (Psi)":    "Presión (psi)",
}

plot_4metrics_by_machine_plotly(
    df_base=df_pel_group,  # tu dataframe ya agrupado
    metrics=METRICS,
    group_col="Maquina Empleada",
    output_path=f"{ROOT_IMAGEN}/metrics_por_maquina.html"
)


✅ Gráfico guardado como HTML en: ../images/raza_junio/metrics_por_maquina.html
