# CLV & Churn Model Training
This notebook trains a model to predict Customer Lifetime Value (CLV) and churn probability, then saves it as `clv_model.pkl`.

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score
import joblib

In [2]:
np.random.seed(42)
n_samples = 500

In [3]:
data = pd.DataFrame({
    'age': np.random.randint(18, 70, n_samples),
    'income': np.random.randint(20000, 150000, n_samples),
    'tenure': np.random.randint(1, 10, n_samples),
    'num_purchases': np.random.randint(1, 50, n_samples),
})

In [4]:
data['clv'] = (
    data['income'] * 0.05 +
    data['tenure'] * 10 +
    data['num_purchases'] * 2 +
    np.random.normal(0, 100, n_samples)
).astype(int)

data['churn'] = np.where(
    (data['tenure'] < 3) & (data['num_purchases'] < 10), 1, 0
)

In [5]:
X_clv = data[['age', 'income', 'tenure', 'num_purchases']]
y_clv = data['clv']

X_clv_train, X_clv_test, y_clv_train, y_clv_test = train_test_split(X_clv, y_clv, test_size=0.2, random_state=42)

clv_model = LinearRegression()
clv_model.fit(X_clv_train, y_clv_train)

y_clv_pred = clv_model.predict(X_clv_test)
clv_mse = mean_squared_error(y_clv_test, y_clv_pred)

In [6]:
joblib.dump(clv_model, 'clv_model.pkl')

['clv_model.pkl']

In [7]:
y_churn = data['churn']

X_churn_train, X_churn_test, y_churn_train, y_churn_test = train_test_split(X_clv, y_churn, test_size=0.2, random_state=42)

churn_model = LogisticRegression()
churn_model.fit(X_churn_train, y_churn_train)

y_churn_pred = churn_model.predict(X_churn_test)
churn_acc = accuracy_score(y_churn_test, y_churn_pred)

In [8]:
joblib.dump(churn_model, 'churn_model.pkl')

['churn_model.pkl']

In [9]:
print("CLV Model Mean Squared Error:", clv_mse)
print("Churn Model Accuracy:", churn_acc)

CLV Model Mean Squared Error: 10886.70675393371
Churn Model Accuracy: 0.97
