In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import LabelEncoder
import pickle

# ===== Load your dataset =====
# Change to the actual CSV path
df = pd.read_csv("products_impact.csv")

# ===== Keep only the right features and targets =====
features = [
    'category', 'material', 'packaging_type', 'transport_mode',
    'distance_km', 'weight_kg', 'energy_kwh', 'water_process_l'
]
targets = ['carbon_kg', 'water_liters']

X = df[features].copy()
y = df[targets].copy()

# ===== Encode categorical columns =====
label_encoders = {}
for col in ['category', 'material', 'packaging_type', 'transport_mode']:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

# ===== Train the multi-output model =====
base_model = RandomForestRegressor(n_estimators=200, random_state=42)
model = MultiOutputRegressor(base_model)
model.fit(X, y)

# ===== Save the model and encoders =====
with open("rf_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("label_encoders.pkl", "wb") as f:
    pickle.dump(label_encoders, f)

print("Model retrained and saved as rf_model.pkl")


Model retrained and saved as rf_model.pkl
