In [None]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

# Load data
df = pd.read_csv("data/coupon_dummy.csv")

# Create target variable
df["SUCCESS"] = (df["CUMULATIVEQTY"] >= 0.8 * df["FORECASTEDQTY"]).astype(int)

# Features & target
X = df[
    [
        "FACEVALUE",
        "MTHSACTIVE",
        "DISTQUANTITY",
        "MEDIA",
        "BRAND",
        "PRODUCTSELLINGPRICE",
        "BUDGET_DOLLARS",
        "FORECASTEDQTY",
    ]
]
y = df["SUCCESS"]

# Preprocessing
numeric_features = [
    "FACEVALUE",
    "MTHSACTIVE",
    "DISTQUANTITY",
    "PRODUCTSELLINGPRICE",
    "BUDGET_DOLLARS",
    "FORECASTEDQTY",
]

categorical_features = ["MEDIA", "BRAND"]

preprocessor = ColumnTransformer(
    transformers=[
        ("num", "passthrough", numeric_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features),
    ]
)

# Model
model = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("classifier", LogisticRegression()),
    ]
)

# Train
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
model.fit(X_train, y_train)

# Save model
joblib.dump(model, "model/coupon_success_model.pkl")

print("Model trained and saved.")
