In [2]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

path = "Datos_experimentos_lenguado.xlsx"
df = pd.read_excel(path)

# Verificación
df.shape, df.columns


((1250, 9),
 Index(['Peso (g)', 'Longitud_REAL (mm)', 'Anchura_REAL (mm)', 'Longitud_px',
        'Anchura_px', 'Longitud_AV (mm)', 'Anchura_AV (mm)',
        'Error_longitud_% (AV-REAL)', 'Error_anchura_% (AV-REAL)'],
       dtype='object'))

In [6]:
COL_PESO = "Peso (g)"
COL_L = "Longitud_REAL (mm)"
COL_A = "Anchura_REAL (mm)"


In [7]:
mask = (
    df[COL_PESO].notna() &
    df[COL_L].notna() &
    df[COL_A].notna() &
    (df[COL_PESO] > 0) &
    (df[COL_L] > 0) &
    (df[COL_A] > 0)
)

mask.sum(), len(df)



(np.int64(1250), 1250)

In [8]:
df["_log_peso"] = np.nan
df["_log_L"] = np.nan
df["_log_A"] = np.nan

df.loc[mask, "_log_peso"] = np.log(df.loc[mask, COL_PESO])
df.loc[mask, "_log_L"]    = np.log(df.loc[mask, COL_L])
df.loc[mask, "_log_A"]    = np.log(df.loc[mask, COL_A])



In [9]:
X = sm.add_constant(df.loc[mask, ["_log_L", "_log_A"]])
y = df.loc[mask, "_log_peso"]

model = sm.OLS(y, X).fit()
print(model.summary())

b0 = model.params["const"]
b1 = model.params["_log_L"]
b2 = model.params["_log_A"]

smearing = np.mean(np.exp(model.resid))

print(f"""
Modelo final:
Peso(g) = exp({b0:.6f}) * L^{b1:.6f} * A^{b2:.6f} * {smearing:.6f}
""")



                            OLS Regression Results                            
Dep. Variable:              _log_peso   R-squared:                       0.967
Model:                            OLS   Adj. R-squared:                  0.967
Method:                 Least Squares   F-statistic:                 1.841e+04
Date:                Tue, 20 Jan 2026   Prob (F-statistic):               0.00
Time:                        09:44:20   Log-Likelihood:                 885.16
No. Observations:                1250   AIC:                            -1764.
Df Residuals:                    1247   BIC:                            -1749.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -10.2194      0.086   -118.161      0.0

In [15]:
# Inicializar columnas nuevas (se conservan TODAS las originales)
df["Peso_inferido_g"] = np.nan
df["Peso_inferido_g_smear"] = np.nan

# Predicción solo en filas válidas
log_pred = model.predict(X)

df.loc[mask, "Peso_inferido_g"] = np.exp(log_pred)
df.loc[mask, "Peso_inferido_g_smear"] = np.exp(log_pred) * smearing



In [16]:
df.drop(columns=["_log_peso", "_log_L", "_log_A"], inplace=True)

In [1]:
out_path = "Datos_experimentos_lenguado_final.xlsx"
df.to_excel(out_path, index=False)

out_path


NameError: name 'df' is not defined