In [49]:
# !pip install pandas numpy matplotlib scipy statsmodels scikit-learn openpyxl

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy import stats
import statsmodels.api as sm
from sklearn.model_selection import GroupKFold
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

PATH = "Dataset_validacion.xlsx"
df = pd.read_excel(PATH)

# Renombrado robusto (ajusta si tu Excel tiene nombres distintos)
rename_map = {
    "Peso (g)": "peso_g",
    "Longitud_real (mm)": "L_real",
    "Anchura_real (mm)": "A_real",
    "Longitud_AV (mm)": "L_av",
    "Anchura_AV (mm)": "A_av",
    "Experimento": "exp",
    "Error_longitud_% (AV-REAL)": "errL_pct",
    "Error_anchura_% (AV-REAL)": "errA_pct",
}
df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})

# Comprobación de columnas necesarias
required = ["peso_g", "L_real", "A_real", "L_av", "A_av", "exp"]
missing = [c for c in required if c not in df.columns]
if missing:
    raise ValueError(f"Faltan columnas requeridas: {missing}\nColumnas disponibles: {list(df.columns)}")

# Filtrado mínimo (evita log(0) y valores no físicos)
df = df.dropna(subset=required).copy()
df = df[(df["peso_g"] > 0) & (df["L_real"] > 0) & (df["A_real"] > 0) & (df["L_av"] > 0) & (df["A_av"] > 0)]

df["exp"] = df["exp"].astype(int)
print(df.shape)
display(df.head())

(1250, 10)


Unnamed: 0,peso_g,L_real,A_real,Longitud_px,Anchura_px,L_av,A_av,errL_pct,errA_pct,exp
0,0.46,33,13,302,118,33.3,13.01,0.909091,0.076923,1
1,0.46,33,13,299,118,32.96,13.01,-0.121212,0.076923,5
2,0.46,33,13,299,118,32.96,13.01,-0.121212,0.076923,3
3,0.67,39,15,351,136,38.7,14.99,-0.769231,-0.066667,3
4,0.82,41,17,375,154,41.34,16.98,0.829268,-0.117647,1


In [50]:
# Variables logarítmicas para ajuste
d = df.copy()
d["ln_peso"] = np.log(d["peso_g"])
d["ln_L"] = np.log(d["L_real"])
d["ln_A"] = np.log(d["A_real"])

X = sm.add_constant(d[["ln_L", "ln_A"]])
y = d["ln_peso"]

ols = sm.OLS(y, X).fit()
print(ols.summary())

ln_a = ols.params["const"]
b = ols.params["ln_L"]
c = ols.params["ln_A"]
a = float(np.exp(ln_a))

print("\nParámetros alométricos (ajuste con REALES):")
print(f"a = {a:.6e}")
print(f"b = {b:.6f}")
print(f"c = {c:.6f}")

                            OLS Regression Results                            
Dep. Variable:                ln_peso   R-squared:                       0.967
Model:                            OLS   Adj. R-squared:                  0.967
Method:                 Least Squares   F-statistic:                 1.841e+04
Date:                Tue, 20 Jan 2026   Prob (F-statistic):               0.00
Time:                        13:52:21   Log-Likelihood:                 885.16
No. Observations:                1250   AIC:                            -1764.
Df Residuals:                    1247   BIC:                            -1749.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -10.2194      0.086   -118.161      0.0

In [51]:
resid = ols.resid.values
smearing = float(np.mean(np.exp(resid)))
print("Smearing factor:", smearing)

print("MODELO ALOMÉTRICO FINAL")
print(f"Peso(g) = {a:.6e} · L(mm)^{b:.6f} · A(mm)^{c:.6f}")
print(f"Smearing factor = {smearing:.6f}")

display(ols.summary())
display(np.exp(ols.conf_int()).rename(columns={0:"IC_low",1:"IC_high"}))

Smearing factor: 1.0070803626203455
MODELO ALOMÉTRICO FINAL
Peso(g) = 3.645743e-05 · L(mm)^1.997086 · A(mm)^0.968498
Smearing factor = 1.007080


0,1,2,3
Dep. Variable:,ln_peso,R-squared:,0.967
Model:,OLS,Adj. R-squared:,0.967
Method:,Least Squares,F-statistic:,18410.0
Date:,"Tue, 20 Jan 2026",Prob (F-statistic):,0.0
Time:,13:52:25,Log-Likelihood:,885.16
No. Observations:,1250,AIC:,-1764.0
Df Residuals:,1247,BIC:,-1749.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-10.2194,0.086,-118.161,0.000,-10.389,-10.050
ln_L,1.9971,0.045,44.190,0.000,1.908,2.086
ln_A,0.9685,0.039,25.153,0.000,0.893,1.044

0,1,2,3
Omnibus:,11.281,Durbin-Watson:,1.861
Prob(Omnibus):,0.004,Jarque-Bera (JB):,12.256
Skew:,-0.177,Prob(JB):,0.00218
Kurtosis:,3.332,Cond. No.,161.0


Unnamed: 0,IC_low,IC_high
const,3.1e-05,4.3e-05
ln_L,6.742446,8.050623
ln_A,2.44234,2.84067


In [54]:
def predict_allometric(L, A, a, b, c, smearing=1.0):
    L = np.asarray(L, dtype=float)
    A = np.asarray(A, dtype=float)
    return (a * (L**b) * (A**c)) * smearing

df["peso_pred"] = predict_allometric(df["L_av"], df["A_av"], a, b, c, smearing=smearing)

# Errores respecto a peso real
df["err_abs_peso"] = df["peso_pred"] - df["peso_g"]
df["err_rel_peso"] = 100.0 * df["err_abs_peso"] / df["peso_g"]

display(df[["exp", "peso_g", "peso_pred", "err_abs_peso", "err_rel_peso"]].head())


Unnamed: 0,exp,peso_g,peso_pred,err_abs_peso,err_rel_peso
0,1,0.46,0.483591,0.023591,5.12843
1,5,0.46,0.47378,0.01378,2.995702
2,3,0.46,0.47378,0.01378,2.995702
3,3,0.67,0.748872,0.078872,11.771917
4,1,0.82,0.963993,0.143993,17.560066


In [55]:
df.to_excel('Dataset_validacion_final.xlsx', index=False)