# Introduction

This notebook will demonstrate the application of Customer Segmentation and RFM (Recency, Frequency, Monetary Value) Analysis using our  package. These techniques are important to understand behaviors of the customers and improve retention strategies in the business' markets.

## Setup

In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Import custom modules from our package
from CustomerFrequency.DataBase.data_generator import generate_customer, generate_menu, generate_orders, generate_employee, generate_transactions

# Ensure that all visualizations are plotted inline with the notebook
%matplotlib inline

# Data Generation

Generate data using our data_generator module

In [7]:
NUMBER_OF_TRANSACTIONS=2000
NUMBER_OF_EMPLOYEES=30
NUMBER_OF_CUSTOMERS=1500
NUMBER_OF_ORDERS=150

In [8]:
employee_data = [generate_employee(employee_id) for employee_id in range(NUMBER_OF_EMPLOYEES)]

# Convert the list of employee data to a DataFrame
employee_df = pd.DataFrame(employee_data)

# Display the first few rows of the DataFrame
print(employee_df.head())

# Write the DataFrame to a CSV file
employee_df.to_csv('employee_data.csv', index=False)

   employee_Id first_name last_name                       email  builidingID  \
0            0       Mark      Lowe       gsullivan@example.com         7359   
1            1      Kayla  Anderson  joshuamarshall@example.com         6603   
2            2     Joseph    Hanson          gstout@example.org         4373   
3            3    Patrick     Reyes         kevin31@example.com         3150   
4            4    Jeffrey       Lee          lori69@example.net         3207   

       phone_number  
0  +374 (87) 526126  
1  +374 (96) 295022  
2  +374 (38) 081212  
3  +374 (94) 321398  
4  +374 (74) 134227  


In [9]:
customer_data = [generate_customer(customer_id) for customer_id in range(NUMBER_OF_CUSTOMERS)]

# Convert the list of customer data to a DataFrame
customer_df = pd.DataFrame(customer_data)

# Display the first few rows of the DataFrame
print(customer_df.head())

# Write the DataFrame to a CSV file
customer_df.to_csv('customer_data.csv', index=False)

   customer_id first_name last_name      phone_number
0            0   Jennifer    Martin  +374 (76) 826999
1            1    Melissa     Craig  +374 (85) 285434
2            2        Roy   Hammond  +374 (59) 403635
3            3     Connie    Taylor  +374 (37) 582973
4            4   Savannah    Adkins  +374 (96) 843228


In [10]:
menu_data = [generate_menu(menu_id) for menu_id in range(1, 101)]

# Convert the list of menu data to a DataFrame
menu_df = pd.DataFrame(menu_data)

# Display the first few rows of the DataFrame
print(menu_df.head())

# Write the DataFrame to a CSV file
menu_df.to_csv('menu_data.csv', index=False)

   menu_id                                    name price  size
0        1                    Cinnamon Dolce Latte   400   0.5
1        2                          Iced Black Tea  1200  0.75
2        3                             Caffè Latte   350   0.5
3        4  Cinnamon Caramel Cream Nitro Cold Brew   600  0.25
4        5                     Caramel Frappuccino   450   0.5


order_data = [] 
for order_id in range(1, NUMBER_OF_ORDERS + 1):
    menu_id = random.randint(1, len(menu_data))
    customer_id = random.randint(1, len(customer_data))

    order = generate_orders(order_id, menu_id, customer_id)
    order_data.append(order)

# Convert the list of order data to a DataFrame
order_df = pd.DataFrame(order_data)

# Display the first few rows of the DataFrame
print(order_df.head())

# Write the DataFrame to a CSV file
order_df.to_csv('order_data.csv', index=False)

In [None]:
transactions_data = []

for transaction_id in range(1, NUMBER_OF_TRANSACTIONS + 1):
    customer_id = random.randint(1, len(customer_data))
    employee_id = random.randint(1, len(employee_data))
    
    transaction = generate_transactions(transaction_id, customer_id, employee_id)
    transactions_data.append(transaction)

# Convert the list of transaction data to a DataFrame
transactions_df = pd.DataFrame(transactions_data)

# Display the first few rows of the DataFrame
print(transactions_df.head())

# Write the DataFrame to a CSV file
transactions_df.to_csv('transactions_data.csv', index=False)
  

# Customer Segmentation Scenario

In this scenario, we will demonstrate how our module will segment customers based on their purchasing behavior - how frequently they purchase the product.

# Load the data
customer_data = pd.read_csv('CustomerFrequency/Data/customer_data.csv')
transaction_data = pd.read_csv('CustomerFrequency/Data/transactions_data.csv')

In [None]:
# Data Pre-processing

transaction_data['date_of_payment'] = pd.to_datetime(transaction_data['date_of_payment'])

transactions_per_customer = transaction_data['customer_id'].value_counts().reset_index()
transactions_per_customer.columns = ['customer_id', 'transaction_count']

customer_transaction_data = pd.merge(customer_data, transactions_per_customer, on='customer_id', how='left').fillna(0)
customer_transaction_data.head()

In [None]:
# Define customer segments based on transaction count
def categorize_customer(x):
    if x > 4:
        return 'Frequent Buyer'
    elif x >= 2 and x <= 4:
        return 'Occasional Buyer'
    elif x == 1:
        return 'One-Time Buyer'
    else:
        return 'Inactive'

# Apply the categorization function
customer_transaction_data['customer_segment'] = customer_transaction_data['transaction_count'].apply(categorize_customer)

# Display the distribution of customer segments
customer_segment_distribution = customer_transaction_data['customer_segment'].value_counts()
customer_segment_distribution, customer_transaction_data.head()

So, here we can see the customer segments, based on their purchasing frequency.

Below, we can see the visualization of the segments:

In [None]:
visualize_customer_segments(customer_segment_distribution)