# Customer Lifetime Value (LTV) Prediction Model

**Objective:** Predict the lifetime value (LTV) of customers based on their purchase behavior to aid in targeted marketing.

**Tools Used:** Python (Sklearn, XGBoost), Pandas, NumPy, Seaborn, Matplotlib

In [None]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Simulate dataset
np.random.seed(42)
n_customers = 200
data = pd.DataFrame({
    'CustomerID': np.arange(1, n_customers + 1),
    'Frequency': np.random.poisson(10, n_customers),
    'Recency': np.random.randint(1, 365, n_customers),
    'AOV': np.round(np.random.uniform(20, 500, n_customers), 2),
})
data['LTV'] = (data['Frequency'] * data['AOV']) / data['Recency'] * 30 + np.random.normal(0, 100, n_customers)
data.head()

In [None]:
# Feature selection
X = data[['Frequency', 'Recency', 'AOV']]
y = data['LTV']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
# Evaluation
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f'MAE: {mae:.2f}, RMSE: {rmse:.2f}')

In [None]:
# Visualize results
plt.figure(figsize=(8,5))
sns.scatterplot(x=y_test, y=y_pred)
plt.xlabel('Actual LTV')
plt.ylabel('Predicted LTV')
plt.title('Actual vs Predicted LTV')
plt.grid(True)
plt.show()