In [3]:
from sqlalchemy import create_engine
import pandas as pd
import pymysql
import numpy as np
import matplotlib.pyplot as plt

from dateutil.relativedelta import  relativedelta

In [4]:
engine = create_engine('mysql+pymysql://web:dzh20030112@47.93.125.169/web')
products_data = pd.read_sql_query('select * from products', engine)
orders_data = pd.read_sql_query("select * from orders", engine)
order_details_data = pd.read_sql_query("select * from order_details", engine)


#转换时间类型
orders_data['o_time'] = pd.to_datetime(orders_data['o_time'])
orders_data['paid_time'] = pd.to_datetime(orders_data['paid_time'])
#将表融合
merged_data = pd.merge(orders_data,order_details_data,left_on='o_id',right_on='order_id')
merged_data = pd.merge(merged_data,products_data,left_on='product_id',right_on='p_id')

merged_data.head(1)




Unnamed: 0,o_id,status,paid_time,o_time,total_price,user_id,order_detail_id,quantity,current_single_price,order_id,product_id,shop_id,p_id,p_name,brand,p_type_id
0,1,1,2022-07-01 04:55:00,2022-07-01 04:48:00,15000.0,2,1001,2,4999.0,1,1,1,1,手机,华为,1


In [25]:
#筛选出一年之内的购买记录
current_time = pd.Timestamp.now()
two_years_ago = current_time - relativedelta(years=1)
filtered_data = merged_data[(merged_data['paid_time'] >= two_years_ago) &
                               (merged_data['paid_time'] <= current_time)]
filtered_data = filtered_data.query('shop_id == 10')
filtered_data.head(5)


Unnamed: 0,o_id,status,paid_time,o_time,total_price,user_id,order_detail_id,quantity,current_single_price,order_id,product_id,shop_id,p_id,p_name,brand,p_type_id
2,2,1,2023-07-01 08:20:00,2023-07-01 08:14:00,13446.0,88,1,3,5000.0,2,1,10,1,手机,华为,1
3,23,1,2023-07-04 16:14:00,2023-07-04 15:47:00,5000.0,15,23,1,5000.0,23,1,10,1,手机,华为,1
4,33,1,2023-07-06 03:09:00,2023-07-06 02:42:00,5000.0,249,33,1,5000.0,33,1,10,1,手机,华为,1
5,34,1,2023-07-06 08:46:00,2023-07-06 08:38:00,15000.0,131,34,3,5000.0,34,1,10,1,手机,华为,1
6,41,1,2023-07-08 04:15:00,2023-07-08 04:03:00,15000.0,18,41,3,5000.0,41,1,10,1,手机,华为,1


In [None]:

# 创建一个空的DataFrame来存储RFM值
RFM = pd.DataFrame()
# 计算R（最近一次购买时间）注意，这个R是dataframe格式
R = filtered_data.groupby('user_id')['paid_time'].max().reset_index()
R.columns = ['u_id', 'last_purchase_time']  # 重命名列以避免混淆
RFM['u_id'] = R['u_id']
RFM['Recency'] = (pd.Timestamp.now() - R['last_purchase_time']).dt.days
# 计算F（购买频次）
F = filtered_data.groupby('user_id').size().reset_index(name='frequency')  
# 使用size()来计算每个组的行数,即该u_id在这一段时间内共出现了多少次。
RFM['Frequency'] = F['frequency']
# 计算M（总消费金额）
M = filtered_data.groupby('user_id')['total_price'].sum().reset_index()
RFM['Monetary'] = M['total_price']
RFM

In [None]:
R_threshold = RFM['Recency'].mean()
F_threshold = RFM['Frequency'].mean()
M_threshold = RFM['Monetary'].mean()
print(R_threshold)
print(F_threshold)
print(M_threshold)

In [None]:
# 标识高于(1)或低于(0)平均值
RFM['R'] = (RFM['Recency'] < R_threshold).astype(int)
RFM['F'] = (RFM['Frequency'] > F_threshold).astype(int)
RFM['M'] = (RFM['Monetary'] > M_threshold).astype(int)
RFM

In [None]:

RFM['RFM_Class'] = RFM['R'].astype(str)+RFM['F'].astype(str)+RFM['M'].astype(str)

# 创建中文标签映射
rfm_labels = {
    '111': '重要价值客户',
    '110': '潜力客户',
    '101': '重要深耕客户',
    '100': '新客户',
    '011': '重要唤回客户',
    '010': '一般维持用户',
    '001': '重要挽留客户',
    '000': '流失用户'
}

RFM['RFM_Label'] = RFM['RFM_Class'].map(rfm_labels)

RFM[['u_id', 'RFM_Class', 'RFM_Label']]