In [3]:
# train.py
import pandas as pd
import joblib
import json
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor

def train_model(data_path=r"C:\Users\Adan\Downloads\motor_second.csv", target="price", save_path="model.pkl"):
    df = pd.read_csv(data_path)

    # Pisah kolom
    numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
    if target in numeric_cols:
        numeric_cols.remove(target)

    categorical_cols = df.select_dtypes(include=['object']).columns.tolist()

    X = df[numeric_cols + categorical_cols]
    y = df[target]

    # Pipeline preprocessing
    preprocessor = ColumnTransformer([
        ("num", StandardScaler(), numeric_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols)
    ])

    model = RandomForestRegressor(
        n_estimators=300,
        random_state=42
    )

    pipeline = Pipeline([
        ("prep", preprocessor),
        ("model", model)
    ])

    pipeline.fit(X, y)

    # save model
    joblib.dump(pipeline, save_path)

    metadata = {
        "numeric_cols": numeric_cols,
        "categorical_cols": categorical_cols,
        "target": target
    }
    with open("metadata.json", "w") as f:
        json.dump(metadata, f, indent=2)

    print("Training selesai!")
    print("Model disimpan ke:", save_path)
    print("Metadata disimpan ke: metadata.json")


if __name__ == "__main__":
    train_model()


KeyError: 'price'

In [None]:
r"C:\Users\Adan\Downloads\motor_second.csv"