In [2]:
# lca_model_extended.ipynb

import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor
import pickle

# 1. Load data
data = pd.read_csv("lca_dataset.csv")

# 2. Features and targets
X = data[["material","route","quantity","energy_mwh","transport_km","end_of_life",
          "process_stage1_energy","process_stage2_energy","process_stage3_energy"]]

y = data[["co2_kg","water_l","waste_kg","recycled_content","resource_efficiency","extended_life","reuse_potential"]]

# 3. Preprocessing
categorical_features = ["material","route","end_of_life"]
numerical_features = ["quantity","energy_mwh","transport_km","process_stage1_energy","process_stage2_energy","process_stage3_energy"]

preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features),
        ("num", "passthrough", numerical_features)
    ]
)

# 4. Multi-output regression
model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42)))
])

# 5. Train
model.fit(X, y)

# 6. Save model
with open("lca_model_extended.pkl", "wb") as f:
    pickle.dump(model, f)

print("Model trained and saved as lca_model_extended.pkl")


Model trained and saved as lca_model_extended.pkl
