In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import joblib

# Load data
df = pd.read_csv('../../data/simulated_sales_data.csv')

# One-hot encode categorical columns
categoricals = ['sku', 'region', 'weather', 'event']
encoder = OneHotEncoder(sparse=False)
encoded = encoder.fit_transform(df[categoricals])
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(categoricals))

# Combine features
X = pd.concat([encoded_df, df[['buzz_score']]], axis=1)
y = df['units_sold']

# Train-test split
split_idx = int(len(X) * 0.8)
X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
preds = model.predict(X_test)
mae = mean_absolute_error(y_test, preds)
print(f"MAE: {mae:.2f}")

# Save model & encoder
joblib.dump(model, '../../backend/model/demand_model.pkl')
joblib.dump(encoder, '../../backend/model/encoder.pkl')