# Shefu — YOLOv8 + Regresión Implícita (imagen → nota; criterios aprendidos internamente)

Este cuaderno entrena un modelo **de una sola salida (nota 0–100)** que aprende internamente los 4 criterios visuales mediante una **rama auxiliar** usada solo en entrenamiento.

**Pipeline:** YOLOv8 → crop → red con pérdida auxiliar → salida nota.


In [None]:
# !pip install ultralytics==8.2.103 tensorflow==2.15.0 pandas==2.1.4 numpy==1.26.4 scikit-learn==1.3.2 matplotlib==3.8.0
from ultralytics import YOLO
import os, json, ast
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV3Small
from sklearn.model_selection import train_test_split
import cv2
import matplotlib.pyplot as plt
print('TensorFlow:', tf.__version__)


In [None]:
DATA_CSV_PATH = '/mnt/data/project-2-at-2025-09-12-06-46-b36c7e7f.csv'
IMAGES_ROOT  = '/mnt/data/images'
YOLO_WEIGHTS = r'C:\\ruta\\a\\tu\\runs_detector1\\rf_completos22\\weights\\best.pt'
CROPS_DIR    = '/mnt/data/crops_yolo'
IMG_SIZE     = 224
BATCH_SIZE   = 16
EPOCHS       = 25
VAL_SPLIT    = 0.2
RANDOM_STATE = 42
os.makedirs(CROPS_DIR, exist_ok=True)
print('CSV:', DATA_CSV_PATH)


In [None]:
def parse_score_field(x):
    if pd.isna(x):
        return np.nan
    try:
        data = json.loads(x)
        if isinstance(data, list) and len(data) > 0 and 'number' in data[0]:
            return float(data[0]['number'])
    except Exception:
        pass
    try:
        data = ast.literal_eval(x)
        if isinstance(data, list) and len(data) > 0 and 'number' in data[0]:
            return float(data[0]['number'])
    except Exception:
        pass
    return np.nan

def parse_motivo_to_flags(motivo):
    flags = {'pan_quemado':0, 'falta_ingrediente':0, 'desordenado':0, 'buen_balance_visual':0}
    if pd.isna(motivo):
        return flags
    text = str(motivo)
    choices = None
    if '"choices"' in text or "'choices'" in text:
        try:
            obj = json.loads(text)
        except Exception:
            try:
                obj = ast.literal_eval(text)
            except Exception:
                obj = None
        if isinstance(obj, dict) and 'choices' in obj:
            choices = obj['choices']
    def set_from(s):
        s = s.lower()
        if 'pan quemado' in s: flags['pan_quemado']=1
        if 'falta de ingrediente' in s: flags['falta_ingrediente']=1
        if 'desordenado' in s: flags['desordenado']=1
        if 'buen balance visual' in s: flags['buen_balance_visual']=1
    if choices is None:
        set_from(text)
    else:
        for c in choices:
            set_from(str(c))
    return flags


In [None]:
detector = YOLO(YOLO_WEIGHTS)
def yolo_crop_to_file(img_path, save_dir=CROPS_DIR, conf=0.25):
    base = os.path.basename(img_path)
    crop_path = os.path.join(save_dir, base)
    if os.path.exists(crop_path):
        return crop_path
    results = detector(img_path, conf=conf, verbose=False)
    boxes = results[0].boxes.xyxy.cpu().numpy().astype(int) if len(results)>0 else np.array([])
    if boxes.shape[0]==0:
        return None
    x1,y1,x2,y2 = boxes[0]
    img = cv2.imread(img_path)
    if img is None:
        return None
    h,w = img.shape[:2]
    x1,y1 = max(0,x1), max(0,y1)
    x2,y2 = min(w,x2), min(h,y2)
    crop = img[y1:y2, x1:x2]
    if crop.size==0:
        return None
    cv2.imwrite(crop_path, crop)
    return crop_path


In [None]:
df = pd.read_csv(DATA_CSV_PATH)
df['score_clean'] = df['score'].apply(parse_score_field)
flags_series = df['motivo'].apply(parse_motivo_to_flags)
for k in ['pan_quemado','falta_ingrediente','desordenado','buen_balance_visual']:
    df[k] = flags_series.apply(lambda d: d[k])
def resolve_original_path(p):
    if pd.isna(p): return None
    base = os.path.basename(str(p))
    return os.path.join(IMAGES_ROOT, base)
df['orig_path'] = df['data'].apply(resolve_original_path)
crop_paths = []
for p in df['orig_path'].tolist():
    if p is None or not os.path.exists(p):
        crop_paths.append(None); continue
    cp = yolo_crop_to_file(p, save_dir=CROPS_DIR, conf=0.25)
    crop_paths.append(cp)
df['crop_path'] = crop_paths
df = df.dropna(subset=['score_clean','crop_path']).reset_index(drop=True)
df = df[df['crop_path'].apply(lambda p: os.path.exists(p))].reset_index(drop=True)
print('Total con crop válido:', len(df))


In [None]:
from sklearn.model_selection import train_test_split
IMG_SIZE=224
BATCH_SIZE=16
VAL_SPLIT=0.2
RANDOM_STATE=42
train_df, val_df = train_test_split(df, test_size=VAL_SPLIT, random_state=RANDOM_STATE, shuffle=True)
BIN_COLS = ['pan_quemado','falta_ingrediente','desordenado','buen_balance_visual']
def load_and_resize(p):
    img = tf.io.read_file(p)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))
    img = tf.cast(img, tf.float32)/255.0
    return img
def augment(img):
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_brightness(img, 0.1)
    img = tf.image.random_contrast(img, 0.9, 1.1)
    return img
def make_ds(frame, bs, training=True):
    xi = frame['crop_path'].values
    ys = (frame['score_clean'].values/100.0).astype(np.float32)
    yf = frame[BIN_COLS].values.astype(np.float32)
    ds_img = tf.data.Dataset.from_tensor_slices(xi).map(load_and_resize, num_parallel_calls=tf.data.AUTOTUNE)
    if training:
        ds_img = ds_img.map(augment, num_parallel_calls=tf.data.AUTOTUNE)
    ds_y_s = tf.data.Dataset.from_tensor_slices(ys)
    ds_y_f = tf.data.Dataset.from_tensor_slices(yf)
    ds_y = tf.data.Dataset.zip((ds_y_s, ds_y_f)).map(lambda s,f: {'score':s, 'flags':f})
    ds = tf.data.Dataset.zip((ds_img, ds_y))
    if training:
        ds = ds.shuffle(buffer_size=len(frame), reshuffle_each_iteration=True)
    return ds.batch(bs).prefetch(tf.data.AUTOTUNE)
train_ds = make_ds(train_df, 16, True)
val_ds   = make_ds(val_df, 16, False)


In [None]:
inp = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3), name='img')
bb = MobileNetV3Small(include_top=False, weights='imagenet', input_tensor=inp)
bb.trainable = False
x = layers.GlobalAveragePooling2D()(bb.output)
x = layers.Dense(128, activation='relu')(x)
aux = layers.Dense(32, activation='relu')(x)
out_flags = layers.Dense(4, activation='sigmoid', name='flags')(aux)
h = layers.Concatenate()([x, out_flags])
h = layers.Dense(64, activation='relu')(h)
h = layers.Dropout(0.3)(h)
out_score = layers.Dense(1, activation='linear', name='score')(h)
model = models.Model(inputs=inp, outputs=[out_score, out_flags])
model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss={'score':'mse','flags':'binary_crossentropy'}, loss_weights={'score':1.0,'flags':0.3}, metrics={'score':['mae']})
model.summary()


In [None]:
callbacks=[
    tf.keras.callbacks.EarlyStopping(monitor='val_score_loss', patience=5, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_score_loss', factor=0.5, patience=2, min_lr=1e-6, verbose=1)
]
h1 = model.fit(train_ds, validation_data=val_ds, epochs=25, callbacks=callbacks)
import matplotlib.pyplot as plt
plt.figure(); plt.plot(h1.history['score_loss']); plt.plot(h1.history['val_score_loss']); plt.title('MSE nota'); plt.legend(['train','val']); plt.show()
plt.figure(); plt.plot(h1.history['score_mae']); plt.plot(h1.history['val_score_mae']); plt.title('MAE nota'); plt.legend(['train','val']); plt.show()


In [None]:
N=20
for layer in bb.layers[-N:]: layer.trainable=True
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss={'score':'mse','flags':'binary_crossentropy'}, loss_weights={'score':1.0,'flags':0.3}, metrics={'score':['mae']})
h2 = model.fit(train_ds, validation_data=val_ds, epochs=10, callbacks=callbacks)
plt.figure(); plt.plot(h2.history['score_loss']); plt.plot(h2.history['val_score_loss']); plt.title('MSE nota FT'); plt.legend(['train','val']); plt.show()
plt.figure(); plt.plot(h2.history['score_mae']); plt.plot(h2.history['val_score_mae']); plt.title('MAE nota FT'); plt.legend(['train','val']); plt.show()


In [None]:
OUTPUT_DIR='/mnt/data'
os.makedirs(OUTPUT_DIR, exist_ok=True)
full_path=os.path.join(OUTPUT_DIR,'shefu_impl_full_yolo.h5')
model.save(full_path); print('Guardado full:', full_path)
infer_model=tf.keras.Model(inputs=model.input, outputs=model.get_layer('score').output)
infer_path=os.path.join(OUTPUT_DIR,'shefu_impl_infer_yolo.h5')
infer_model.save(infer_path); print('Guardado infer:', infer_path)
converter=tf.lite.TFLiteConverter.from_keras_model(infer_model)
converter.optimizations=[tf.lite.Optimize.DEFAULT]
tflite_bytes=converter.convert()
tflite_path=os.path.join(OUTPUT_DIR,'shefu_impl_infer_yolo.tflite')
with open(tflite_path,'wb') as f: f.write(tflite_bytes)
print('Guardado TFLite:', tflite_path)


In [None]:
try:
    infer_model
except NameError:
    infer_model=tf.keras.models.load_model('/mnt/data/shefu_impl_infer_yolo.h5')
def infer_full_image(image_path, conf=0.25):
    cp=yolo_crop_to_file(image_path, save_dir=CROPS_DIR, conf=conf)
    if cp is None or not os.path.exists(cp):
        print('No se pudo obtener crop del completo.'); return None
    img=tf.image.decode_jpeg(tf.io.read_file(cp), channels=3)
    img=tf.image.resize(img,(IMG_SIZE,IMG_SIZE))
    img=tf.cast(img,tf.float32)/255.0
    img=tf.expand_dims(img,0)
    pred=infer_model.predict(img,verbose=0)[0,0]
    score=float(np.clip(pred,0,1)*100.0)
    print(f'Nota estimada: {score:.1f}/100')
    return score
# Ejemplo: infer_full_image(r'C:\\ruta\\a\\una\\foto.jpg')
