In [None]:

'''
解决汉字乱码问题
'''
import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['SimHei']  # 设置字体
plt.rcParams['axes.unicode_minus'] = False
import os
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['SimHei']  # 设置字体
plt.rcParams['axes.unicode_minus'] = False


# 1. 加载数据
def load_data(file_path):
    try:
        data = pd.read_csv(file_path)
        print(f"{os.path.basename(file_path)} 加载成功")
        return data
    except Exception as e:
        print(f"{os.path.basename(file_path)} 加载失败:", e)
        return None


# 2. 数据清理
def clean_data(data):
    # 检查缺失值
    missing_data = data.isnull().sum()
    print("缺失值统计:\n", missing_data[missing_data > 0])

    # 检查重复值
    duplicates = data.duplicated().sum()
    print("重复值数量:", duplicates)

    return data


file_paths = {
    "用户行为表": "电商平台行为数据表/用户行为表.csv",
    "用户表": "电商平台行为数据表/用户表.csv",
    "用户评论表": "电商平台行为数据表/用户评论表.csv",
    "用户购买信息表": "电商平台行为数据表/用户购买信息表.csv",
    "订单表": "电商平台行为数据表/订单表.csv"
}
# 加载各个CSV文件
user_behavior = load_data(file_paths["用户行为表"])
user_info = load_data(file_paths["用户表"])
user_comments = load_data(file_paths["用户评论表"])
purchase_info = load_data(file_paths["用户购买信息表"])
orders = load_data(file_paths["订单表"])

In [None]:
orders.info()
orders.head()

In [None]:
orders_with_comments = orders[orders['备注'].notna() & (orders['备注'] != '')]

# 输出结果
'''
打印出有备注的订单
'''
orders_with_comments

In [None]:
total_orders = orders['订单编号'].nunique()
orders_with_comments_count = orders_with_comments['订单编号'].nunique()

# 绘制饼图
labels = ['有备注的订单', '无备注的订单']
sizes = [orders_with_comments_count, total_orders - orders_with_comments_count]
colors = ['#ff9999','#66b3ff']
explode = (0.1, 0)  # 仅“有备注的订单”突出

plt.figure(figsize=(8, 6))
plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
        shadow=True, startangle=140)
plt.title('订单备注情况')
plt.axis('equal')  
plt.show()

# 输出统计信息
print(f'总订单数: {total_orders}')
print(f'有备注的订单数: {orders_with_comments_count}')

In [None]:
#总销售额和订单数
total_orders = orders['订单编号'].nunique()
total_sales = (orders['价格'] * orders['购买数量']).sum()

print(f'总订单数: {total_orders}')
print(f'总销售额: {total_sales}')

In [None]:
# 订单状态分布
order_status_counts = orders['订单状态'].value_counts()

plt.figure(figsize=(10, 6))
sns.barplot(x=order_status_counts.index, y=order_status_counts.values, palette='viridis')
plt.title('订单状态分布')
plt.xlabel('订单状态')
plt.ylabel('数量')
plt.xticks(rotation=45)
plt.show()
successful_orders_count = orders[orders['订单状态'] == '交易成功']['订单编号'].nunique()
closed_orders_count = orders[orders['订单状态'] == '交易关闭']['订单编号'].nunique()

# 输出结果
print(f'交易成功的订单数: {successful_orders_count}')
print(f'已关闭的订单数: {closed_orders_count}')
print(f'交易失败率为：{closed_orders_count/total_orders*100}%')
print(f'转化率为：{100-closed_orders_count/total_orders*100}%')