In [2]:
# customer_satisfaction_model_training.py
import os
import joblib
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier  # xgboost==1.7.6

# Load and clean
df = pd.read_csv("data/Passenger_Satisfaction.csv").dropna()

# Encode categorical
le = LabelEncoder()
for col in ['Gender', 'Customer Type', 'Type of Travel', 'Class', 'satisfaction']:
    df[col] = le.fit_transform(df[col])

X = df.drop(columns=['id', 'satisfaction'])
y = df['satisfaction']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save scaler
model_dir = Path("customer_satisfaction_prediction/models")
model_dir.mkdir(parents=True, exist_ok=True)
joblib.dump(scaler, model_dir / "scaler_compressed.pkl", compress=("xz", 3))

# Models
classifiers = {
    "logistic_regression": LogisticRegression(max_iter=1000),
    "random_forest": RandomForestClassifier(),
    "gradient_boosting": GradientBoostingClassifier(),
    "xgboost": XGBClassifier(use_label_encoder=False, eval_metric="logloss"),
    "knn": KNeighborsClassifier()
}

# Train and save
for name, model in classifiers.items():
    print(f"\n🔧 Training and saving: {name} ...")
    model.fit(X_train_scaled, y_train)

    model_path = model_dir / f"{name}_compressed.pkl"
    joblib.dump(model, model_path, compress=("xz", 3))

    size_mb = model_path.stat().st_size / (1024 * 1024)
    print(f"✅ Saved {model_path.name} - Size: {size_mb:.2f} MB")





🔧 Training and saving: logistic_regression ...
✅ Saved logistic_regression_compressed.pkl - Size: 0.00 MB

🔧 Training and saving: random_forest ...
✅ Saved random_forest_compressed.pkl - Size: 7.28 MB

🔧 Training and saving: gradient_boosting ...
✅ Saved gradient_boosting_compressed.pkl - Size: 0.05 MB

🔧 Training and saving: xgboost ...
✅ Saved xgboost_compressed.pkl - Size: 0.11 MB

🔧 Training and saving: knn ...
✅ Saved knn_compressed.pkl - Size: 1.80 MB
