In [1]:
# 📦 Import Libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import joblib
import os

# 📌 Load Your Processed CLTV Data
# Make sure your DataFrame has these columns:
# ['Customer_ID', 'Frequency', 'Recency', 'Monetary', 'AOV', 'PF', 'ProfitMargin', 'CLTV']
cltv = pd.read_csv('cltv_dataset.csv')

# 🧼 Drop any rows with missing values (optional safety)
cltv.dropna(subset=['Frequency', 'Recency', 'Monetary', 'AOV', 'PF', 'ProfitMargin', 'CLTV'], inplace=True)

# 🎯 Define Features and Target
X = cltv[['Frequency', 'Recency', 'Monetary', 'AOV', 'PF', 'ProfitMargin']]
y = cltv['CLTV']

# 🔀 Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 🤖 Train Model
model = LinearRegression()
model.fit(X_train, y_train)

# ✅ Save the Model
os.makedirs('models', exist_ok=True)
joblib.dump(model, 'models/cltv_model.pkl')
print("✅ CLTV model saved to 'models/cltv_model.pkl'")

# (Optional) Predict CLTV and Add to DataFrame
cltv['Predicted_CLTV'] = model.predict(X)

# (Optional) Save Predicted Results
cltv.to_csv('cltv_with_predictions.csv', index=False)
print("📁 Saved: cltv_with_predictions.csv")


✅ CLTV model saved to 'models/cltv_model.pkl'
📁 Saved: cltv_with_predictions.csv
