In [1]:
import pandas as pd
import random

In [10]:
# Generate synthetic customer data
num_customers = 1000
customer_ids = [f"CUST{i}" for i in range(num_customers)]
customer_names = [f"Customer {i}" for i in range(num_customers)]
segments = ["Consumer", "Corporate", "Home Office"]
countries = ["United States", "Canada", "United Kingdom"]
cities = ["New York", "Toronto", "London"]
states = ["NY", "ON", "LDN"]
regions = ["East", "North", "West"]
cac_expenses = [random.randint(1000, 10000) for _ in range(num_customers)]
churned_customers = random.sample(customer_ids, int(num_customers * 0.2))

In [11]:
# Create a DataFrame for customer data
# This code snippet creates a pandas DataFrame called customer_data with columns
# representing different customer attributes such as customer ID, customer name,
# segment, country, city, state, region, CAC expenses, and churned status.
# The calues for these columns are generated randomly based on given input variables.

customer_data = pd.DataFrame({
    "Customer ID": customer_ids,
    "Customer Name": customer_names,
    "Segment": random.choices(segments, k=num_customers),
    "Country": random.choices(countries, k=num_customers),
    "City": random.choices(cities, k=num_customers),
    "State": random.choices(states, k=num_customers),
    "Region": random.choices(regions, k=num_customers),
    "CAC Expenses": cac_expenses,
    "Churned": [True if cust_id in churned_customers else False for cust_id in customer_ids]
})

In [12]:
customer_data.to_csv('customer_data.csv', index=False)

In [13]:
# Generate random transaction data
num_transactions = 5000
transaction_dates = pd.date_range(start="2022-01-01", periods=num_transactions, freq="D")
transaction_customer_ids = random.choices(customer_ids, k=num_transactions)
purchase_values = [random.uniform(10, 500) for _ in range(num_transactions)]
purchase_frequencies = [random.randint(1, 10) for _ in range(num_transactions)]
transaction_data = pd.DataFrame({
    "Transaction Date": transaction_dates,
    "Customer ID": transaction_customer_ids,
    "Purchase Value": purchase_values,
    "Purchase Frequency": purchase_frequencies
})

In [14]:
transaction_data.to_csv('transaction_data.csv', index=False)

In [15]:
transaction_data.head()

Unnamed: 0,Transaction Date,Customer ID,Purchase Value,Purchase Frequency
0,2022-01-01,CUST652,335.318162,10
1,2022-01-02,CUST499,224.908967,6
2,2022-01-03,CUST476,203.423891,5
3,2022-01-04,CUST647,258.174556,2
4,2022-01-05,CUST730,496.357364,5


In [16]:
# Calculate CLTV based on synthetic transaction data
## This code snippet groups the transaction_data DataFrame by "Customer ID" and calculates several
# aggregate statistics such as the total sales, total transactions, avegare purchase value,
# and average purchase frequency for each customer. The results are stores in a new DataFrame
## called transaction_data_grouped

transaction_data_grouped = transaction_data.groupby("Customer ID").agg(
    TotalSales=("Purchase Value", "sum"),
    TotalTransactions=("Purchase Value", "count"),
    AvgPurchaseValue=("Purchase Value", "mean"),
    AvgPurchaseFrequency=("Purchase Frequency", "mean")
).reset_index()

In [17]:
transaction_data_grouped.head()

Unnamed: 0,Customer ID,TotalSales,TotalTransactions,AvgPurchaseValue,AvgPurchaseFrequency
0,CUST0,1084.059037,4,271.014759,8.25
1,CUST1,754.031687,3,251.343896,5.666667
2,CUST10,1701.469592,5,340.293918,5.2
3,CUST100,1835.064612,7,262.152087,6.714286
4,CUST101,2308.272753,8,288.534094,4.875


In [18]:
# This code calculates the average customer lifespan and customer lifetime value (CLTV)
# based on transaction data. The average customer lifespan is calculated by dividing the
# total number of transactions by the average purchase frequency. The CLTV is calculated
# by multiplying the average purchase value, average purchase frequency, and average
# customer lifespan.

transaction_data_grouped["AvgCustomerLifespan"] = transaction_data_grouped["TotalTransactions"] / transaction_data_grouped["AvgPurchaseFrequency"]
transaction_data_grouped["CLTV"] = transaction_data_grouped["AvgPurchaseValue"] * transaction_data_grouped["AvgPurchaseFrequency"] * transaction_data_grouped["AvgCustomerLifespan"]


In [19]:
transaction_data_grouped.head()

Unnamed: 0,Customer ID,TotalSales,TotalTransactions,AvgPurchaseValue,AvgPurchaseFrequency,AvgCustomerLifespan,CLTV
0,CUST0,1084.059037,4,271.014759,8.25,0.484848,1084.059037
1,CUST1,754.031687,3,251.343896,5.666667,0.529412,754.031687
2,CUST10,1701.469592,5,340.293918,5.2,0.961538,1701.469592
3,CUST100,1835.064612,7,262.152087,6.714286,1.042553,1835.064612
4,CUST101,2308.272753,8,288.534094,4.875,1.641026,2308.272753


In [9]:
transaction_data_grouped[["Customer ID", "CLTV"]].head()

Unnamed: 0,Customer ID,CLTV
0,CUST0,819.689744
1,CUST1,1574.821909
2,CUST10,2861.34225
3,CUST100,1247.810649
4,CUST101,2275.97845


In [10]:
customer_data.head()

Unnamed: 0,Customer ID,Customer Name,Segment,Country,City,State,Region,CAC Expenses,Churned
0,CUST0,Customer 0,Consumer,United Kingdom,London,NY,West,5378,False
1,CUST1,Customer 1,Home Office,Canada,Toronto,NY,East,5210,False
2,CUST2,Customer 2,Corporate,Canada,New York,NY,North,7748,True
3,CUST3,Customer 3,Consumer,Canada,Toronto,NY,East,1173,True
4,CUST4,Customer 4,Corporate,United Kingdom,London,LDN,East,7263,False


In [11]:
# Display sample customer data and CLTV calculation
print("Sample Customer Data:")
print(customer_data.head())

print("\nSample Transaction Data:")
print(transaction_data.head())

print("\nCalculated CLTV:")
print(transaction_data_grouped[["Customer ID", "CLTV"]].head())


Sample Customer Data:
  Customer ID Customer Name      Segment         Country      City State  \
0       CUST0    Customer 0     Consumer  United Kingdom    London    NY   
1       CUST1    Customer 1  Home Office          Canada   Toronto    NY   
2       CUST2    Customer 2    Corporate          Canada  New York    NY   
3       CUST3    Customer 3     Consumer          Canada   Toronto    NY   
4       CUST4    Customer 4    Corporate  United Kingdom    London   LDN   

  Region  CAC Expenses  Churned  
0   West          5378    False  
1   East          5210    False  
2  North          7748     True  
3   East          1173     True  
4   East          7263    False  

Sample Transaction Data:
  Transaction Date Customer ID  Purchase Value  Purchase Frequency
0       2022-01-01     CUST376      146.837956                   3
1       2022-01-02     CUST753      215.157200                  10
2       2022-01-03     CUST422      364.572372                   6
3       2022-01-04     