# 描述性数据

In [None]:
import sqlite3
import pandas as pd

def load_filtered_data(db_path, table_name, exclude_columns=None):
    """从数据库加载数据并排除指定列"""
    if exclude_columns is None:
        exclude_columns = []
    
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # 获取所有列名
    cursor.execute(f"PRAGMA table_info({table_name})")
    columns = [col[1] for col in cursor.fetchall()]
    
    # 过滤要排除的列
    selected_columns = [col for col in columns if col not in exclude_columns]
    cols_str = ', '.join(f'"{col}"' for col in selected_columns)
    
    # 加载筛选后的数据
    df = pd.read_sql(f"SELECT {cols_str} FROM {table_name}", conn)
    conn.close()
    return df

def basic_stats(df):
    """生成仅包含均值、标准差、中位数的统计表（仅数值列）"""
    numeric_df = df.select_dtypes(include=['number'])
    stats = pd.concat([
        numeric_df.mean().rename('平均数'),
        numeric_df.std().rename('标准差'),
        numeric_df.median().rename('中位数')
    ], axis=1)
    
    stats['非空值数'] = numeric_df.count()
    return stats[['非空值数', '平均数', '标准差', '中位数']]

if __name__ == "__main__":
    db_path = "nomis_data.db"
    table_name = "ols_data_2024"
    
    try:
        # 加载数据时直接排除LAD25CD列
        data = load_filtered_data(db_path, table_name, exclude_columns=['LAD25CD'])
        
        # 生成统计
        stats_result = basic_stats(data)
        
        # 打印结果
        pd.set_option('display.float_format', '{:.2f}'.format)
        print("===== 精简描述统计（已跳过LAD25CD）=====")
        print(stats_result)
        
        # 保存结果
        stats_result.to_csv(f"{table_name}_核心统计.csv")
        print(f"\n结果已保存到: {table_name}_核心统计.csv")
        
    except Exception as e:
        print(f"执行出错: {e}")

执行出错: can only concatenate str (not "int") to str
