In [1]:
import json
import sqlite3
from collections import defaultdict
from datetime import datetime
import pandas as pd
import logging

In [2]:
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('db/ideapod_group.log')
    ]
)

def preprocess_datetime(df: pd.DataFrame) -> pd.DataFrame:
    """Central datetime preprocessing to reduce redundant operations"""
    datetime_columns = [
        '创建时间', '预定开始时间', '预定结束时间', '实际结束时间',  # Space
        '下单时间',  # Catering
    ]
    for col in datetime_columns:
        if col in df.columns:
            if not pd.api.types.is_datetime64_any_dtype(df[col]):
                try:
                    df[col] = pd.to_datetime(df[col], errors='coerce')
                except Exception as e:
                    logging.error(f"转换 {col} 列时出错：{e}")
    return df

def convert_df_to_dict(data):
    if isinstance(data, pd.DataFrame):
        df = data.copy()
        # 处理时间类型列
        for col in df.columns:
            if pd.api.types.is_datetime64_any_dtype(df[col]) or pd.api.types.is_period_dtype(df[col]):
                df[col] = df[col].astype(str)
        # 处理时间类型的索引
        if pd.api.types.is_datetime64_any_dtype(df.index) or pd.api.types.is_period_dtype(df.index):
            df.index = df.index.astype(str)
        # 将 NaN 替换为 None，确保 JSON 中为 null
        return df.replace({np.nan: None}).to_dict(orient='records')
    elif isinstance(data, dict):
        return {k: convert_df_to_dict(v) for k, v in data.items()}
    elif isinstance(data, list):
        return [convert_df_to_dict(item) for item in data]
    elif isinstance(data, (pd.Timestamp, pd.Period, datetime.date)) or pd.isna(data):
        return str(data) if not pd.isna(data) else None
    return data

In [3]:
def get_db_connection():
    conn = sqlite3.connect('db/ideapod.db')
    conn.row_factory = sqlite3.Row
    return conn

In [23]:
def analyze(conn):
    """主分析函数"""
    try:
        catering_df = pd.read_sql_query("SELECT * FROM Catering", conn)
        space_df = pd.read_sql_query("SELECT * FROM Space", conn)
        
        # Preprocess datetime
        catering_df = preprocess_datetime(catering_df)
        space_df = preprocess_datetime(space_df)

        
       

        columns_to_keep = ['手机号','预定开始时间','实付金额','吧台场景收入','大众点评场景收入']
        
        space_df = space_df[space_df.columns[space_df.columns.isin(columns_to_keep)]]

        return space_df[space_df['预定开始时间'].dt.date==pd.to_datetime('2024-12-06').date()]
        
        # Filter data
        catering_df = catering_df.drop(catering_df[catering_df['服务方式'] == '报损'].index, inplace=False)
        
        # Add date columns
        catering_df['订单月'] = catering_df['下单时间'].dt.to_period('M')
        catering_df['订单周'] = catering_df['下单时间'].dt.to_period('W-MON').apply(lambda x: x.start_time.date())
        catering_df['订单日'] = catering_df['下单时间'].dt.date
        space_df['订单月'] = space_df['预定开始时间'].dt.to_period('M')
        space_df['订单周'] = space_df['预定开始时间'].dt.to_period('W-MON').apply(lambda x: x.start_time.date())
        space_df['订单日'] = space_df['预定开始时间'].dt.date


        
        
        financial_results = analyze_finance(space_df, catering_df)

        processed_results = {}
        for category, data in financial_results.items():
            processed_results[category] = {key: convert_df_to_dict(value) for key, value in data.items()}

        return processed_results

    except sqlite3.Error as e:
        return {'error': f"Database error: {e}"}
    except Exception as e:
        return {'error': f"An error occurred: {e}"}

In [24]:
def analyze_finance(space_df: pd.DataFrame, catering_df: pd.DataFrame) -> dict:
    """周度和日度财务分析"""
    
    # Daily analysis
    daily_catering = catering_df.groupby('订单日').agg(
        餐饮实收=('实收', 'sum')
    ).reset_index()
    
    daily_space = space_df.groupby('订单日').agg({
        '实付金额': 'sum',
        '吧台场景收入': 'sum',
        '大众点评场景收入': 'sum'
    }).reset_index()
    
    # Rename columns for clarity
    daily_space.columns = ['订单日', '场景收入', '吧台场景收入', '大众点评场景收入']
    
    daily_data = daily_catering.merge(daily_space, on='订单日', how='outer')
    
    daily_data = daily_data.fillna(0)
    # Calculate 餐饮收入 = 餐饮实收 - 吧台场景收入
    daily_data['餐饮收入'] = daily_data['餐饮实收'] - daily_data['吧台场景收入']
    daily_data = daily_data[['订单日', '餐饮收入', '场景收入', '餐饮实收', '吧台场景收入', '大众点评场景收入']]
    
    
    # Weekly analysis
    weekly_catering = catering_df.groupby('订单周').agg(
        餐饮实收=('实收', 'sum')
    ).reset_index()
    
    weekly_space = space_df.groupby('订单周').agg({
        '实付金额': 'sum',
        '吧台场景收入': 'sum',
        '大众点评场景收入': 'sum'
    }).reset_index()
    
    weekly_space.columns = ['订单周', '场景收入', '吧台场景收入', '大众点评场景收入']
    weekly_data = weekly_catering.merge(weekly_space, on='订单周', how='outer')
    weekly_data = weekly_data.fillna(0)
    weekly_data['餐饮收入'] = weekly_data['餐饮实收'] - weekly_data['吧台场景收入']
    
    # Convert to list of dicts for stacked result
    weekly_result = weekly_data[['订单周', '餐饮收入', '场景收入']].to_dict(orient='records')

    # Trailing 4 weeks analysis
    trailing_data = []
    for i in range(len(weekly_result)):
        space_trailing_4 = sum(d['场景收入'] for d in weekly_result[max(0, i-3):i+1])
        catering_trailing_4 = sum(d['餐饮收入'] for d in weekly_result[max(0, i-3):i+1])
        
        week_trailing = {
            "周": str(weekly_result[i]['订单周']),
            "场景_trailing_4_week收入": space_trailing_4,
            "餐饮_trailing_4_week收入": catering_trailing_4
        }
        
        # WoW calculation
        if i > 0:
            prev_space = trailing_data[i-1]["场景_trailing_4_week收入"]
            prev_catering = trailing_data[i-1]["餐饮_trailing_4_week收入"]
            week_trailing["场景_wow"] = (space_trailing_4 / prev_space - 1) * 100 if prev_space != 0 else 0
            week_trailing["餐饮_wow"] = (catering_trailing_4 / prev_catering - 1) * 100 if prev_catering != 0 else 0
        else:
            week_trailing["场景_wow"] = 0
            week_trailing["餐饮_wow"] = 0
        
        # MoM calculation
        if i >= 4:
            prev_4_space = trailing_data[i-4]["场景_trailing_4_week收入"]
            prev_4_catering = trailing_data[i-4]["餐饮_trailing_4_week收入"]
            week_trailing["场景_mom"] = (space_trailing_4 / prev_4_space - 1) * 100 if prev_4_space != 0 else 0
            week_trailing["餐饮_mom"] = (catering_trailing_4 / prev_4_catering - 1) * 100 if prev_4_catering != 0 else 0
        else:
            week_trailing["场景_mom"] = 0
            week_trailing["餐饮_mom"] = 0
        
        trailing_data.append(week_trailing)
    
    wow_result = [{"周": w["周"], "场景周环比": w["场景_wow"], "餐饮周环比": w["餐饮_wow"]} for w in trailing_data]
    mom_result = [{"周": w["周"], "场景月环比": w["场景_mom"], "餐饮月环比": w["餐饮_mom"]} for w in trailing_data]

    # Output structure
    output_data = {
        "周度销售收入_stacked": weekly_result,
        "过去四周收入周环比(%)_line": wow_result,
        "过去四周收入月环比(%)_line": mom_result,
        "日度销售收入_table": daily_data.to_dict(orient='records')
    }

    # Print results for reference
    print("分析完成")
    print("\n周度数据示例:", weekly_result[-1] if weekly_result else "无数据")
    # print("日度数据示例:", daily_data.iloc[-1].to_dict() if not daily_data.empty else "无数据")
    
    return {'集团财务': output_data}

In [25]:
print (analyze(get_db_connection()))

                  预定开始时间   实付金额  吧台场景收入  大众点评场景收入          手机号
2811 2024-12-06 20:14:00   50.0     0.0       0.0  13081523882
2812 2024-12-06 20:00:00   75.0     0.0       0.0  18802110728
2815 2024-12-06 18:54:00   15.0    15.0       0.0  18016323521
2816 2024-12-06 19:30:00  100.0   100.0       0.0  18121317625
2817 2024-12-06 18:42:00   50.0     0.0       0.0  18221681017
2819 2024-12-06 17:00:00    NaN     0.0       0.0  16621714319
2820 2024-12-06 15:43:00  120.0     0.0       0.0  13689011638
2821 2024-12-06 15:30:00  125.0   125.0       0.0  18621638488
2822 2024-12-06 16:00:00   55.0    55.0       0.0  13916564531
2824 2024-12-06 14:41:00  125.0   125.0       0.0  13501781988
2825 2024-12-06 15:00:00    NaN     0.0       0.0  19821808970
2826 2024-12-06 14:19:00   41.0     0.0       0.0  18565578310
2827 2024-12-06 14:00:00  300.0   300.0       0.0  18616506519
2828 2024-12-06 15:00:00  125.0     0.0     125.0  13754914320
2829 2024-12-06 13:21:00   40.0    40.0       0.0  1308

  df[col] = pd.to_datetime(df[col], errors='coerce')
