# Introduction

This notebook will demonstrate the application of Customer Segmentation and RFM (Recency, Frequency, Monetary Value) Analysis using our  package. These techniques are important to understand behaviors of the customers and improve retention strategies in the business' markets.

## Setup

In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random 

# Import custom modules from our package
from CustomerFrequency.DataBase.data_generator import generate_customer, generate_menu, generate_orders, generate_employee, generate_transactions

# Ensure that all visualizations are plotted inline with the notebook
%matplotlib inline

# Data Generation

Generate data using our data_generator module

In [5]:
NUMBER_OF_TRANSACTIONS=2000
NUMBER_OF_EMPLOYEES=30
NUMBER_OF_CUSTOMERS=1500
NUMBER_OF_ORDERS=150

In [7]:
employee_data = [generate_employee(employee_id) for employee_id in range(NUMBER_OF_EMPLOYEES)]

# Convert the list of employee data to a DataFrame
employee_df = pd.DataFrame(employee_data)

# Display the first few rows of the DataFrame
print(employee_df.head())

# Write the DataFrame to a CSV file
employee_df.to_csv('employee_data.csv', index=False)

   employee_Id first_name last_name                      email  builidingID  \
0            0      Alvin      Cruz         mlopez@example.com         4365   
1            1   Patricia   Everett    susanromero@example.com         7591   
2            2    Jessica   Osborne  jensenmatthew@example.com         5948   
3            3     Regina    Watson     kimberly14@example.org         7856   
4            4      Aaron  Robinson     ryanpowell@example.net         3042   

       phone_number  
0  +374 (19) 664561  
1  +374 (76) 766040  
2  +374 (50) 792637  
3  +374 (62) 595882  
4  +374 (66) 440717  


In [8]:
customer_data = [generate_customer(customer_id) for customer_id in range(NUMBER_OF_CUSTOMERS)]

# Convert the list of customer data to a DataFrame
customer_df = pd.DataFrame(customer_data)

# Display the first few rows of the DataFrame
print(customer_df.head())

# Write the DataFrame to a CSV file
customer_df.to_csv('customer_data.csv', index=False)

   customer_id first_name last_name      phone_number
0            0    Jessica     Logan  +374 (15) 040714
1            1       Todd     Glass  +374 (36) 606094
2            2      Diane  Schaefer  +374 (44) 008644
3            3      Amber     Perez  +374 (27) 555589
4            4     Donald     Greer  +374 (93) 139155


In [9]:
menu_data = [generate_menu(menu_id) for menu_id in range(1, 101)]

# Convert the list of menu data to a DataFrame
menu_df = pd.DataFrame(menu_data)

# Display the first few rows of the DataFrame
print(menu_df.head())

# Write the DataFrame to a CSV file
menu_df.to_csv('menu_data.csv', index=False)

   menu_id                                   name price  size
0        1          Iced Peach Green Tea Lemonade   400  0.25
1        2                   Iced Peach Green Tea   550  0.25
2        3  Iced Hazelnut Oatmilk Shaken Espresso   650  0.75
3        4                  Chocolate Chip Cookie   550  0.25
4        5                Iced Black Tea Lemonade   600  0.25


order_data = [] 
for order_id in range(1, NUMBER_OF_ORDERS + 1):
    menu_id = random.randint(1, len(menu_data))
    customer_id = random.randint(1, len(customer_data))

    order = generate_orders(order_id, menu_id, customer_id)
    order_data.append(order)

# Convert the list of order data to a DataFrame
order_df = pd.DataFrame(order_data)

# Display the first few rows of the DataFrame
print(order_df.head())

# Write the DataFrame to a CSV file
order_df.to_csv('order_data.csv', index=False)

In [13]:
transactions_data = []

for transaction_id in range(1, NUMBER_OF_TRANSACTIONS + 1):
    customer_id = random.randint(1, len(customer_data))
    employee_id = random.randint(1, len(employee_data))
    
    transaction = generate_transactions(transaction_id, customer_id, employee_id)
    transactions_data.append(transaction)

# Convert the list of transaction data to a DataFrame
transactions_df = pd.DataFrame(transactions_data)

# Display the first few rows of the DataFrame
print(transactions_df.head())

# Write the DataFrame to a CSV file
transactions_df.to_csv('transactions_data.csv', index=False)
  

   transaction_id            date_of_payment  customer_id  employee_id  \
0               1 2021-08-11 17:34:31.276623         1225           29   
1               2 2020-03-23 08:56:55.481713         1023           11   
2               3 2023-08-19 02:30:23.556348         1266           22   
3               4 2023-07-30 01:07:54.102100          220            4   
4               5 2023-09-03 07:49:11.894799          691            6   

   amount        type  
0   25570        visa  
1    2910        cash  
2   18500  mastercard  
3   23810    applepay  
4    7890        cash  


# Customer Segmentation Scenario

In this scenario, we will demonstrate how our module will segment customers based on their purchasing behavior - how frequently they purchase the product.

# Load the data
customer_data = pd.read_csv('CustomerFrequency/Data/customer_data.csv')
transaction_data = pd.read_csv('CustomerFrequency/Data/transactions_data.csv')

In [17]:
# Data Pre-processing

transactions_data['date_of_payment'] = pd.to_datetime(transactions_data['date_of_payment'])

transactions_per_customer = transactions_data['customer_id'].value_counts().reset_index()
transactions_per_customer.columns = ['customer_id', 'transaction_count']

customer_transaction_data = pd.merge(customer_data, transactions_per_customer, on='customer_id', how='left').fillna(0)
customer_transaction_data.head()

TypeError: list indices must be integers or slices, not str

In [None]:
# Define customer segments based on transaction count
def categorize_customer(x):
    if x > 4:
        return 'Frequent Buyer'
    elif x >= 2 and x <= 4:
        return 'Occasional Buyer'
    elif x == 1:
        return 'One-Time Buyer'
    else:
        return 'Inactive'

# Apply the categorization function
customer_transaction_data['customer_segment'] = customer_transaction_data['transaction_count'].apply(categorize_customer)

# Display the distribution of customer segments
customer_segment_distribution = customer_transaction_data['customer_segment'].value_counts()
customer_segment_distribution, customer_transaction_data.head()

So, here we can see the customer segments, based on their purchasing frequency.

Below, we can see the visualization of the segments:

In [None]:
visualize_customer_segments(customer_segment_distribution)