In [38]:
import pandas as pd
import numpy as np
import sys
import os

import plotly.graph_objects as go
from sklearn.metrics import r2_score

from datetime import datetime, time
sys.path.append(os.path.abspath(".."))
from core.viz import plot_line, create_subplot_grid, plot_bar, plot_statistical_strip, plot_scatter, plot_distribution
from core.s3 import S3AssetManager

In [39]:
notebook_name = "okuo_sensores_ph_orp_granja_experimental"
s3 = S3AssetManager(notebook_name=notebook_name)

In [43]:

# 2. Definimos la función de categorización
def clasificar_franja_horaria(fecha):
    hora = fecha.hour
    
    # Rango 1: 6:00 AM hasta las 9:59 AM (incluye hora 9)
    if 6 <= hora < 10:
        return "06:00 AM - 10:00 AM"
    
    # Rango 2: 10:00 AM hasta las 11:59 AM
    elif 10 <= hora < 12:
        return "10:00 AM - 12:00 PM"
    
    # Rango 3: 12:00 PM hasta las 2:59 PM (14:59)
    elif 12 <= hora < 15:
        return "12:00 PM - 03:00 PM"
    
    # Todo lo demás (madrugada, tarde-noche)
    else:
        return "Resto del día"



In [44]:
df = pd.read_excel("../raw/SEGUIMIENTO ORP GRANJA EXPERIMENTAL.xlsx",
)
df.columns = df.columns.str.strip().str.lower()

In [45]:
cls = [
'ph granja', 'orp granja', 'ph telemetría',
'orp telemetría', 'diferencia ph', 'diferencia orp'
]

for cl in cls:
    df[cl] = pd.to_numeric(df[cl], errors='coerce')

df['date'] = pd.to_datetime(df['fecha'].astype(str) + ' ' + df['hora'].astype(str))
df

Unnamed: 0,fecha,hora,ph granja,orp granja,ph telemetría,orp telemetría,diferencia ph,diferencia orp,date
0,2025-11-14,09:23:00,4.95,801,4.85,857,-0.1,56,2025-11-14 09:23:00
1,2025-11-14,13:59:00,5.8,751,5.62,832,-0.18,81,2025-11-14 13:59:00
2,2025-11-15,10:00:00,5.8,773,5.93,808,0.13,35,2025-11-15 10:00:00
3,2025-11-15,14:49:00,6.05,711,6.43,780,0.38,69,2025-11-15 14:49:00
4,2025-11-17,06:55:00,5.99,640,,779,,139,2025-11-17 06:55:00
5,2025-11-17,10:55:00,6.38,680,,756,,76,2025-11-17 10:55:00
6,2025-11-18,10:13:00,6.2,614,6.69,718,0.49,104,2025-11-18 10:13:00
7,2025-11-21,14:30:00,4.88,606,3.72,712,-1.16,106,2025-11-21 14:30:00
8,2025-11-22,14:58:00,6.35,638,6.42,742,0.07,104,2025-11-22 14:58:00
9,2025-11-24,10:16:00,7.05,633,7.06,750,0.01,117,2025-11-24 10:16:00


In [48]:

df_granja = df[['date', 'ph granja', 'orp granja']].copy()
df_granja.columns = ['date', 'ph', 'orp']
df_granja['categoria'] = 'Granja'

df_telemetria = df[['date', 'ph telemetría', 'orp telemetría']].copy()
df_telemetria.columns = ['date', 'ph', 'orp']
df_telemetria['categoria'] = 'Telemetría'

df_transformado = pd.concat([df_granja, df_telemetria], ignore_index=True)
df_transformado = df_transformado.sort_values(by='date').reset_index(drop=True)


df_transformado['franja_horaria'] = df_transformado['date'].apply(clasificar_franja_horaria)

df_transformado

Unnamed: 0,date,ph,orp,categoria,franja_horaria
0,2025-11-14 09:23:00,4.95,801,Granja,06:00 AM - 10:00 AM
1,2025-11-14 09:23:00,4.85,857,Telemetría,06:00 AM - 10:00 AM
2,2025-11-14 13:59:00,5.8,751,Granja,12:00 PM - 03:00 PM
3,2025-11-14 13:59:00,5.62,832,Telemetría,12:00 PM - 03:00 PM
4,2025-11-15 10:00:00,5.8,773,Granja,10:00 AM - 12:00 PM
5,2025-11-15 10:00:00,5.93,808,Telemetría,10:00 AM - 12:00 PM
6,2025-11-15 14:49:00,6.05,711,Granja,12:00 PM - 03:00 PM
7,2025-11-15 14:49:00,6.43,780,Telemetría,12:00 PM - 03:00 PM
8,2025-11-17 06:55:00,5.99,640,Granja,06:00 AM - 10:00 AM
9,2025-11-17 06:55:00,,779,Telemetría,06:00 AM - 10:00 AM


In [55]:
by_moment = df_transformado.groupby(['franja_horaria', 'categoria']).agg(
    ph=('ph', 'median'),
    orp=('orp', 'median'),
    measurements=('date', 'count')
).reset_index()
by_moment

Unnamed: 0,franja_horaria,categoria,ph,orp,measurements
0,06:00 AM - 10:00 AM,Granja,5.48,677.5,4
1,06:00 AM - 10:00 AM,Telemetría,4.85,818.0,4
2,10:00 AM - 12:00 PM,Granja,5.8,747.0,7
3,10:00 AM - 12:00 PM,Telemetría,4.93,808.0,7
4,12:00 PM - 03:00 PM,Granja,5.87,713.0,10
5,12:00 PM - 03:00 PM,Telemetría,5.875,800.0,10


In [58]:
plot_bar

<function core.viz.plot_bar(df: pandas.core.frame.DataFrame, x_col: str, y_col: str, group_col: Optional[str] = None, order_x: Optional[List[str]] = None, order_groups: Optional[List[str]] = None, cat_base: Optional[str] = None, filter_empty: bool = True, compact_mode: bool = False, cluster_width: float = 0.8, bar_width_scale: float = 0.9, barmode: str = 'group', show_delta: bool = False, delta_reference_group: Optional[str] = None, delta_unit: str = '', hover_data_cols: Optional[List[str]] = None, title: str = '', x_title: Optional[str] = None, y_title: Optional[str] = None, text_format: str = '.1f', bar_colors: Union[List[str], Dict[str, str], NoneType] = None, height: int = 500, width: int = 1000, font_family: str = 'Inter, Arial, sans-serif', output_path: Optional[str] = None) -> plotly.graph_objs._figure.Figure>

In [67]:
f_ph = plot_bar(
    df=by_moment,
    x_col='franja_horaria',
    y_col='ph',
    group_col='categoria',
    cat_base='Telemetría',
    show_delta=True,
    hover_data_cols=['orp', 'measurements'],
    text_format='.2f',
    x_title='Francia horaria',
    y_title='pH',
    

)
f_ph.show()
s3.save_plotly_html(f_ph, 'by_moment_ph.html')

In [68]:
f_orp = plot_bar(
    df=by_moment,
    x_col='franja_horaria',
    y_col='orp',
    group_col='categoria',
    cat_base='Telemetría',
    show_delta=True,
    hover_data_cols=['ph', 'measurements'],
    text_format='.2f',
    x_title='Francia horaria',
    y_title='ORP',
    

)
f_orp.show()
s3.save_plotly_html(f_orp, 'by_moment_orp.html')

In [69]:
create_subplot_grid

<function core.viz.create_subplot_grid(figures: List[plotly.graph_objs._figure.Figure], rows: int, cols: int, titles: Optional[List[str]] = None, shared_x: bool = False, shared_y: bool = False, main_title: str = '', height: int = 800, width: int = 1000, output_path: Optional[str] = None) -> plotly.graph_objs._figure.Figure>

In [78]:
f_complete = create_subplot_grid(
    figures=[f_ph, f_orp], rows=1, cols=2, 
    titles=['pH', 'ORP'], shared_x=True, shared_y=False,
     main_title='Comportamiento de pH y ORP a lo largo del día (Tomando como referencia mediciones con Telemetría)', width=1200, height=400)
f_complete.show()
s3.save_plotly_html(f_complete, 'by_moment_ph_orp.html')

In [88]:
time_ph = plot_scatter(
    df=df_transformado,
    x_col="date",
    y_col="ph",
    group_col="categoria",
    x_title="Fecha",
    y_title="pH",
    title="pH vs Fecha",
    width=800,
    height=400
)
time_ph.show()
s3.save_plotly_html(time_ph, 'time_ph.html')


In [89]:
time_orp = plot_scatter(
    df=df_transformado,
    x_col="date",
    y_col="orp",
    group_col="categoria",
    x_title="Fecha",
    y_title="ORP",
    title="ORP vs Fecha",
       width=800,
    height=400
)
time_orp.show()
s3.save_plotly_html(time_orp, 'time_orp.html')

In [91]:
f_time_ph_orp = create_subplot_grid(
    figures=[time_ph, time_orp],
    rows=1,
    cols=2,
    titles=['pH', 'ORP'], shared_x=True, shared_y=False,
    main_title='Comportamiento de pH y ORP a lo largo en el tiempo', width=1200, height=400)

f_time_ph_orp.show()
s3.save_plotly_html(f_time_ph_orp, 'time_ph_orp.html')


In [81]:
plot_scatter(
    df=df_transformado,
    x_col="ph",
    y_col="orp",
    group_col="categoria",
    x_title="pH",
    y_title="ORP",
    title="pH vs ORP",
    width=800,
    height=400,
)

In [82]:


def plot_ph_orp_regressions_complete(
    df: pd.DataFrame,
    title: str = "Correlación pH vs ORP: Ecuaciones y Ajuste",
    width: int = 1000,
    height: int = 600
):
    fig = go.Figure()
    
    colors = {'Granja': '#1C8074', 'Telemetría': '#666666'}
    
    for cat in df['categoria'].unique():
        d_cat = df[df['categoria'] == cat].dropna(subset=['ph', 'orp'])
        if len(d_cat) < 2: continue 
        
        x = d_cat['ph'].values
        y = d_cat['orp'].values
        color = colors.get(cat, 'grey')

        # --- A. PUNTOS REALES ---
        fig.add_trace(go.Scatter(
            x=x, y=y, mode='markers', 
            name=f'{cat} (Datos)', # Nombre simple para los puntos
            marker=dict(color=color, size=9, opacity=0.7, line=dict(width=1, color='black'))
        ))

        # Generar rango para las líneas
        x_range = np.linspace(x.min(), x.max(), 100)

        # --- B. REGRESIÓN LINEAL (y = mx + b) ---
        coef_lin = np.polyfit(x, y, 1) # [m, b]
        m, b = coef_lin
        poly1 = np.poly1d(coef_lin)
        r2_lin = r2_score(y, poly1(x))
        
        # Formateo inteligente de la ecuación (manejo de signos)
        sign_b = "+" if b >= 0 else "-"
        eq_label_lin = f"y={m:.1f}x {sign_b} {abs(b):.0f}"
        
        fig.add_trace(go.Scatter(
            x=x_range, y=poly1(x_range), mode='lines',
            # AQUÍ ESTÁ LA CLAVE: Ecuación + R2 en la leyenda
            name=f'{cat} Lin: {eq_label_lin} (R²={r2_lin:.2f})',
            line=dict(color=color, width=3, dash='solid'),
            hovertemplate=f"<b>{cat} Lineal</b><br>pH: %{{x:.2f}}<br>ORP: %{{y:.0f}}<br>Ecuación: {eq_label_lin}"
        ))

        # --- C. REGRESIÓN POLINÓMICA (y = ax² + bx + c) ---
        if len(d_cat) > 3:
            coef_poly = np.polyfit(x, y, 2) # [a, b, c]
            a, b_poly, c_poly = coef_poly
            poly2 = np.poly1d(coef_poly)
            r2_poly = r2_score(y, poly2(x))

            # Formateo de la ecuación cuadrática
            sign_b_poly = "+" if b_poly >= 0 else "-"
            sign_c_poly = "+" if c_poly >= 0 else "-"
            eq_label_poly = f"y={a:.1f}x² {sign_b_poly} {abs(b_poly):.1f}x {sign_c_poly} {abs(c_poly):.0f}"

            fig.add_trace(go.Scatter(
                x=x_range, y=poly2(x_range), mode='lines',
                name=f'{cat} Poly: {eq_label_poly} (R²={r2_poly:.2f})',
                line=dict(color=color, width=3, dash='dot'),
                visible='legendonly' # Oculto por defecto para no saturar
            ))

    # --- ESTILOS Y MARGENES ---
    fig.update_layout(
        title={'text': f"<b>{title}</b>", 'y': 0.98, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
        font=dict(family="Inter, Arial, sans-serif", size=12, color="black"),
        paper_bgcolor="rgba(0,0,0,0)",
        plot_bgcolor="rgba(0,0,0,0)",
        
        # Margen superior amplio (150) para que quepan las leyendas largas
        margin=dict(l=60, r=40, t=150, b=60),
        
        legend=dict(
            orientation="h", 
            yanchor="bottom", 
            y=1.02, 
            xanchor="center", 
            x=0.5,
            # Forzamos que la leyenda se ajuste si es muy ancha
            entrywidth=0, 
            entrywidthmode="pixels"
        ),
        
        height=height,
        width=width,
        hovermode="x unified"
    )
    
    axis_style = dict(
        showline=True, linecolor="black", linewidth=2,        
        ticks="outside", tickcolor="black", tickfont=dict(color="black"),
        showgrid=True, gridcolor='rgba(0,0,0,0.1)' 
    )

    fig.update_xaxes(
        **axis_style,
        title=dict(text="pH", font=dict(color="black", size=14), standoff=15)
    )
    
    fig.update_yaxes(
        **axis_style,
        zeroline=False,
        title=dict(text="ORP (mV)", font=dict(color="black", size=14), standoff=15)
    )
    
    return fig


In [92]:
fig = plot_ph_orp_regressions_complete(df_transformado)
fig.show()
s3.save_plotly_html(fig, 'ph_orp_relacion.html')
