# Customer Lifetime Value (CLV) Prediction

This notebook predicts the future value of a customer to a business over the entire duration of their relationship. It incorporates factors such as past purchase history, frequency of purchases, and customer demographics.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load your dataset
# Replace 'customer_data.csv' with your actual file
df = pd.read_csv('customer_data.csv')
df.head()

## Data Preprocessing
Check for missing values and encode categorical variables.

In [None]:
# Handle missing values
df = df.dropna()

# Encode categorical variables
df = pd.get_dummies(df, drop_first=True)

# Feature selection
features = [col for col in df.columns if col != 'Customer_Lifetime_Value']
X = df[features]
y = df['Customer_Lifetime_Value']

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Model Training

In [None]:
# Train a Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

## Model Evaluation

In [None]:
# Predict and evaluate the model
y_pred = model.predict(X_test)

print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R^2 Score:", r2_score(y_test, y_pred))