In [1]:
import sqlite3
import pandas as pd
import numpy as np
from typing import Dict, Any
from datetime import timedelta

In [2]:
def connect_to_db(db_path: str) -> sqlite3.Connection:
    """Efficiently connect to SQLite database"""
    return sqlite3.connect(db_path, detect_types=sqlite3.PARSE_DECLTYPES)

def preprocess_datetime(df: pd.DataFrame) -> pd.DataFrame:
    """Central datetime preprocessing to reduce redundant operations"""
    datetime_columns = ['创建时间', '预定日期', '下单时间']
    for col in datetime_columns:
        if col in df.columns:
            if not pd.api.types.is_datetime64_any_dtype(df[col]):
                try:
                    df[col] = pd.to_datetime(df[col], errors='coerce')
                except Exception as e:
                    print(f"转换 {col} 列时出错：{e}")
    return df

In [3]:
def get_db_connection():
    conn = sqlite3.connect('db/ideapod.db')
    conn.row_factory = sqlite3.Row
    return conn


In [22]:
def analyze(conn):
    """主分析函数"""
    catering_df = pd.read_sql_query("SELECT * FROM Catering", conn)

    catering_df = preprocess_datetime(catering_df)
    user_df= catering_df[catering_df['商品'].str.contains('图书馆')]

    target_date = pd.to_datetime('2024-07-14')
    filtered_df = user_df[user_df['下单时间'].dt.date == target_date.date()]

    product_df = product_analysis(catering_df, conn)

    return product_df

In [23]:
print (analyze(get_db_connection()))

  product  quantity                订单日期 商品名 基础产品
0       1       1.0 2024-07-14 16:54:25   1   其他


In [26]:
def product_analysis(catering_df: pd.DataFrame, conn) -> dict:
    """
    商品分析：产品周度销售分析，筛选前20个基础产品，按周输出销售数量
    """
    product_df = pd.read_sql_query("SELECT * FROM Product", conn)

    def parse_products(product_str, order_date):
        try:
            products = product_str.split(',')
            parsed_products = []
            for p in products:
                parts = p.split('x')
                if len(parts) == 2:
                    product_name = parts[0].strip()
                    quantity = float(parts[1].strip())
                    parsed_products.append({'product': product_name, 'quantity': quantity, '订单日期': order_date})
            return pd.DataFrame(parsed_products)
        except Exception as e:
            print(f"解析商品字符串时出错: {e}")
            return pd.DataFrame()

    # 解析商品并合并基础产品信息
    
    product_analysis = catering_df[['商品', '下单时间']].apply(lambda x: parse_products(x['商品'], x['下单时间']), axis=1)
    
    product_sales = pd.concat(product_analysis.tolist(), ignore_index=True)    
    product_sales = pd.merge(product_sales, product_df[['商品名', '基础产品']], 
                            left_on='product', right_on='商品名', how='left')
    # 检查是否有未匹配的商品名
    unmatched_products = product_sales[product_sales['商品名'].isna()]['product'].unique()
    
    if len(unmatched_products) > 0:
        # 将新的商品名保存到文件
        new_products_df = pd.DataFrame({'product': unmatched_products})
        new_products_df.to_csv('db/ideapod_product_new.csv', index=False, encoding='utf-8-sig')
        print(f"发现 {len(unmatched_products)} 个新商品名，已保存到 db/ideapod_product_new.csv")
        print("程序已暂停执行，请处理新商品后重新运行")
        # 使用 sys.exit() 暂停程序执行，如果不想完全退出也可以用其他方式
        import sys
        sys.exit(1)
    else: print ("没有新商品名")
    
    output = product_sales
    
    # 计算总销售量并筛选前20个基础产品
    top_products = product_sales.groupby('基础产品')['quantity'].sum().nlargest(20).index
    product_sales = product_sales[product_sales['基础产品'].isin(top_products)]
    
    # 添加周标识
    product_sales['订单周'] = product_sales['订单日期'].dt.to_period('W-MON').apply(lambda x: x.start_time.date())
    
    # 按周和基础产品统计销售数量
    weekly_product_sales = product_sales.groupby(['订单周', '基础产品']).agg(
        周销售数量=('quantity', 'sum')
    ).unstack(fill_value=0)
    weekly_product_sales.columns = [col[1] for col in weekly_product_sales.columns]
    weekly_product_sales = weekly_product_sales.reset_index()
    weekly_product_sales['订单周'] = weekly_product_sales['订单周'].astype(str)

    return output

In [27]:
print (analyze(get_db_connection()))

发现 44 个新商品名，已保存到 db/ideapod_product_new.csv
程序已暂停执行，请处理新商品后重新运行


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
