In [5]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error
import joblib  # Untuk menyimpan LabelEncoders

medicine = pd.read_csv("medicine.csv")

# feature engineering
medicine["record_timestamp"] = pd.to_datetime(medicine["record_timestamp"])
medicine["stockout_timestamp"] = pd.to_datetime(medicine["stockout_timestamp"])

medicine["stockout_days"] = (medicine["stockout_timestamp"] - medicine["record_timestamp"]).dt.days

medicine = medicine[medicine["stockout_days"] > 0].dropna(subset=["stockout_days"])

categorical_features = ["user_category", "zone", "medicine_name", "category", "corr_disease_1",  "corr_disease_2",  "corr_disease_3"]
for col in categorical_features:
    medicine[col] = LabelEncoder().fit_transform(medicine[col])

medicine["stock_per_visitor"] = medicine["stock"] / (medicine["avg_visitor_weekly"] + 1)  # Hindari divisi nol

features = [
    "user_category", "zone", "medicine_name", "category", "stock",
    "restock_frequency", "supplier_reliability", "disease_score_1",
    "disease_score_2", "disease_score_3", "corr_disease_1", "corr_disease_2",
    "corr_disease_3", "avg_price", "avg_visitor_weekly", "price", "stock_per_visitor"
]

X = medicine[features]
y = medicine["stockout_days"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = xgb.XGBRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=8,
    subsample=0.8,  
    colsample_bytree=0.8, 
    random_state=42
)

# Training model
model.fit(X_train, y_train)

# Prediksi
y_pred = model.predict(X_test)

# Evaluasi model
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

model.save_model("xgb_model.json")
print("Model berhasil disimpan sebagai 'xgb_model.json'")

print(f"MAE: {mae:.2f} days")

✅ Model berhasil disimpan sebagai 'xgb_model.json'
MAE: 1.90 days


In [6]:
import pandas as pd
import numpy as np
import json
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

model = xgb.XGBRegressor()
model.load_model("xgb_model.json")

medicine_list = pd.read_csv("medicine_list.csv")
medicine_data = pd.read_csv("medicine.csv")

def infer_stockout(json_input):
    """
    Parameters:
    - json_input: JSON object dengan atribut user, user_category, zone, medicine_name, stock, 
                  record_timestamp, avg_visitor_weekly, price
    """
    data = pd.DataFrame([json.loads(json_input)])

    data["category"] = data["medicine_name"].map(
        dict(zip(medicine_list["medicine_name"], medicine_list["category"]))
    )

    data["restock_frequency"] = data["medicine_name"].map(
        dict(zip(medicine_list["medicine_name"], medicine_list["avg_restock_frequency"]))
    )

    supplier_avg = medicine_data.groupby("medicine_name")["supplier_reliability"].mean().to_dict()
    data["supplier_reliability"] = data["medicine_name"].map(supplier_avg)

    relevant_data = medicine_data[(medicine_data["medicine_name"] == data["medicine_name"].values[0]) &
                                  (medicine_data["zone"] == data["zone"].values[0])]

    if not relevant_data.empty:
        data["disease_score_1"] = relevant_data["disease_score_1"].values[0]
        data["disease_score_2"] = relevant_data["disease_score_2"].values[0]
        data["disease_score_3"] = relevant_data["disease_score_3"].values[0]
        data["corr_disease_1"] = relevant_data["corr_disease_1"].values[0]
        data["corr_disease_2"] = relevant_data["corr_disease_2"].values[0]
        data["corr_disease_3"] = relevant_data["corr_disease_3"].values[0]
    else:
        raise ValueError("Data penyakit tidak ditemukan untuk kombinasi medicine_name dan zone.")

    data["avg_price"] = data["medicine_name"].map(
        dict(zip(medicine_list["medicine_name"], medicine_list["avg_price"]))
    )

    data["record_timestamp"] = pd.to_datetime(data["record_timestamp"])

    data["stock_per_visitor"] = data["stock"] / (data["avg_visitor_weekly"] + 1)  # Hindari divisi nol

    categorical_features = ["user_category", "zone", "medicine_name", "category", "corr_disease_1", "corr_disease_2", "corr_disease_3"]
    for col in categorical_features:
        data[col] = LabelEncoder().fit_transform(data[col])

    features = [
        "user_category", "zone", "medicine_name", "category", "stock",
        "restock_frequency", "supplier_reliability", "disease_score_1",
        "disease_score_2", "disease_score_3", "corr_disease_1", "corr_disease_2",
        "corr_disease_3", "avg_price", "avg_visitor_weekly", "price", "stock_per_visitor"
    ]

    predictions = model.predict(data[features])

    data["predicted_stockout_days"] = predictions
    result_json = data[["user", "medicine_name", "predicted_stockout_days"]].to_json(orient="records")

    return result_json

In [7]:
import pathlib
import pkg_resources

installed_packages = {pkg.key for pkg in pkg_resources.working_set}

with open('requirements.txt', 'w') as f:
    for package in sorted(installed_packages):
        version = pkg_resources.get_distribution(package).version
        f.write(f"{package}=={version}\n")
