# Ogum ML Lite — Pipeline de ML (Fase 3)
Este notebook sintetiza dados de sinterização, gera features via CLI e treina um classificador com validação GroupKFold.

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path

rng = np.random.default_rng(42)
records = []
sample_ids = [f'S{i:02d}' for i in range(6)]
techniques = ['Conventional', 'UHS', 'FS', 'SPS', 'Hybrid', 'FAST']
for idx, (sample_id, tech) in enumerate(zip(sample_ids, techniques, strict=True)):
    times = np.linspace(0, 1500, 50)
    base_temp = 640 + 18 * idx
    rate = 0.22 + 0.04 * idx
    temps = base_temp + rate * times
    dens_curve = 0.18 + 0.78 / (1 + np.exp(-(times - 800) / 110))
    noise = rng.normal(scale=0.015, size=times.size)
    dens = np.clip(dens_curve + 0.02 * idx + noise, 0, 0.995)
    for t, temp, rho in zip(times, temps, dens, strict=True):
        records.append({
            'sample_id': sample_id,
            'time_s': float(t),
            'temp_C': float(temp),
            'rho_rel': float(rho),
            'technique': tech,
        })
df_long = pd.DataFrame(records)
df_long.to_csv('ensaios_demo.csv', index=False)
df_long.head()

In [None]:
!python -m ogum_lite.cli ml features --input ensaios_demo.csv --ea "200,300,400" --output demo_features.csv
features = pd.read_csv('demo_features.csv')
targets = df_long[['sample_id', 'technique']].drop_duplicates()
features_ml = features.merge(targets, on='sample_id', how='left')
features_ml.to_csv('demo_features_ml.csv', index=False)
features_ml.head()

In [None]:
!python -m ogum_lite.cli ml train-cls \
    --table demo_features_ml.csv \
    --target technique \
    --group-col sample_id \
    --features heating_rate_med_C_per_s T_max_C y_final t_to_90pct_s theta_Ea_200kJ theta_Ea_300kJ \
    --outdir artifacts_demo/cls_technique

In [None]:
!python -m ogum_lite.cli ml predict --table demo_features_ml.csv --model artifacts_demo/cls_technique/classifier.joblib --out demo_preds.csv
pd.read_csv('demo_preds.csv').head()