In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import lightgbm as lgb
import xgboost as xgb
import joblib
import os

In [5]:
# Load synthetic data
df = pd.read_csv("../data/sales_data.csv", parse_dates=['date'])


In [6]:
# Feature engineering
df['moving_avg_demand'] = df.groupby('product_id')['units_sold'].transform(lambda x: x.rolling(7, min_periods=1).mean())
df['price_elasticity'] = (df['units_sold'].pct_change() / df['historical_price'].pct_change()).fillna(0)
df['trend_factor'] = df.groupby('product_id')['units_sold'].transform(lambda x: x.rolling(30, min_periods=1).mean())

features = ['units_sold', 'competitor_price', 'stock_level', 'day_of_week', 'holiday_flag',
            'views', 'moving_avg_demand', 'price_elasticity', 'trend_factor']
target = 'historical_price'

In [7]:
X = df[features]
y = df[target]

In [8]:

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
lgb_model = lgb.LGBMRegressor(
    n_estimators=1000,
    learning_rate=0.05,
    max_depth=7,
    random_state=42
)

# Use callbacks for early stopping and logging
lgb_model.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    callbacks=[
        lgb.early_stopping(stopping_rounds=50),
        lgb.log_evaluation(period=50)
    ]
)

joblib.dump(lgb_model, '../models/lgb_model.pkl')

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002595 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1142
[LightGBM] [Info] Number of data points in the train set: 146400, number of used features: 9
[LightGBM] [Info] Start training from score 275.740827
Training until validation scores don't improve for 50 rounds
[50]	valid_0's l2: 194.668
[100]	valid_0's l2: 98.0287
[150]	valid_0's l2: 97.3913
Early stopping, best iteration is:
[142]	valid_0's l2: 97.3865


['../models/lgb_model.pkl']

In [14]:
import xgboost as xgb
import joblib

xgb_model = xgb.XGBRegressor(
    n_estimators=1000,
    learning_rate=0.05,
    max_depth=7,
    random_state=42
)

# Use early_stopping_rounds and verbose instead of callbacks
xgb_model.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    early_stopping_rounds=50,
    verbose=50
)

joblib.dump(xgb_model, '../models/xgb_model.pkl')



TypeError: XGBModel.fit() got an unexpected keyword argument 'early_stopping_rounds'

In [None]:


# Load synthetic data
df = pd.read_csv("../data/sales_data.csv", parse_dates=['date'])

# Feature engineering
df['moving_avg_demand'] = df.groupby('product_id')['units_sold'].transform(lambda x: x.rolling(7, min_periods=1).mean())
df['price_elasticity'] = (df['units_sold'].pct_change() / df['historical_price'].pct_change()).fillna(0)
df['trend_factor'] = df.groupby('product_id')['units_sold'].transform(lambda x: x.rolling(30, min_periods=1).mean())

features = ['units_sold', 'competitor_price', 'stock_level', 'day_of_week', 'holiday_flag',
            'views', 'moving_avg_demand', 'price_elasticity', 'trend_factor']
target = 'historical_price'

X = df[features]
y = df[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -------------------- LightGBM --------------------
lgb_model = lgb.LGBMRegressor(n_estimators=1000, learning_rate=0.05, max_depth=7, random_state=42)
lgb_model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=50, verbose=50)
joblib.dump(lgb_model, '../models/lgb_model.pkl')

# -------------------- XGBoost --------------------
xgb_model = xgb.XGBRegressor(n_estimators=1000, learning_rate=0.05, max_depth=7, random_state=42)
xgb_model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=50, verbose=50)
joblib.dump(xgb_model, '../models/xgb_model.pkl')

# -------------------- Evaluate --------------------
def evaluate_model(y_true, y_pred, model_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    print(f"{model_name} - MAE: {mae:.4f}, RMSE: {rmse:.4f}")
    return mae, rmse

evaluate_model(y_test, lgb_model.predict(X_test), "LightGBM")
evaluate_model(y_test, xgb_model.predict(X_test), "XGBoost")


TypeError: LGBMRegressor.fit() got an unexpected keyword argument 'early_stopping_rounds'